Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
bc5d7f75
Commit
bc5d7f75
authored
Feb 15, 2019
by
Paul
Browse files
Merge from develop
parents
47c0854d
a5b0afa0
Changes
337
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
511 additions
and
217 deletions
+511
-217
src/targets/cpu/include/migraphx/cpu/lowering.hpp
src/targets/cpu/include/migraphx/cpu/lowering.hpp
+21
-0
src/targets/cpu/include/migraphx/cpu/target.hpp
src/targets/cpu/include/migraphx/cpu/target.hpp
+23
-0
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+190
-55
src/targets/cpu/target.cpp
src/targets/cpu/target.cpp
+15
-9
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+34
-14
src/targets/gpu/abs.cpp
src/targets/gpu/abs.cpp
+39
-0
src/targets/gpu/add.cpp
src/targets/gpu/add.cpp
+0
-55
src/targets/gpu/batchnorm.cpp
src/targets/gpu/batchnorm.cpp
+10
-9
src/targets/gpu/concat.cpp
src/targets/gpu/concat.cpp
+9
-9
src/targets/gpu/contiguous.cpp
src/targets/gpu/contiguous.cpp
+8
-8
src/targets/gpu/convolution.cpp
src/targets/gpu/convolution.cpp
+43
-29
src/targets/gpu/device/acos.cpp
src/targets/gpu/device/acos.cpp
+18
-0
src/targets/gpu/device/add.cpp
src/targets/gpu/device/add.cpp
+6
-6
src/targets/gpu/device/add_relu.cpp
src/targets/gpu/device/add_relu.cpp
+6
-6
src/targets/gpu/device/asin.cpp
src/targets/gpu/device/asin.cpp
+18
-0
src/targets/gpu/device/atan.cpp
src/targets/gpu/device/atan.cpp
+18
-0
src/targets/gpu/device/concat.cpp
src/targets/gpu/device/concat.cpp
+11
-11
src/targets/gpu/device/contiguous.cpp
src/targets/gpu/device/contiguous.cpp
+6
-6
src/targets/gpu/device/cos.cpp
src/targets/gpu/device/cos.cpp
+18
-0
src/targets/gpu/device/cosh.cpp
src/targets/gpu/device/cosh.cpp
+18
-0
No files found.
src/targets/cpu/include/migraph/cpu/lowering.hpp
→
src/targets/cpu/include/migraph
x
/cpu/lowering.hpp
View file @
bc5d7f75
#ifndef MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP
#ifndef MIGRAPH
X
_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPH
X
_GUARD_RTGLIB_CPU_LOWERING_HPP
#include <migraph/program.hpp>
#include <migraph/config.hpp>
#include <migraph
x
/program.hpp>
#include <migraph
x
/config.hpp>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
struct
lowering
...
...
@@ -15,7 +15,7 @@ struct lowering
};
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
#endif
src/targets/cpu/include/migraphx/cpu/target.hpp
0 → 100644
View file @
bc5d7f75
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#include <migraphx/program.hpp>
#include <migraphx/cpu/context.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
cpu
{
struct
target
{
std
::
string
name
()
const
;
std
::
vector
<
pass
>
get_passes
(
migraphx
::
context
&
ctx
)
const
;
migraphx
::
context
get_context
()
const
{
return
context
{};
}
};
}
// namespace cpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/cpu/lowering.cpp
View file @
bc5d7f75
#include <migraph/cpu/lowering.hpp>
#include <migraph/instruction.hpp>
#include <migraph/dfor.hpp>
#include <migraph/operators.hpp>
#include <migraph/shape_for_each.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/cpu/gemm.hpp>
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp>
#include <migraphx/cpu/gemm.hpp>
#include <unordered_map>
#include <utility>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
template
<
typename
T
>
...
...
@@ -19,6 +20,14 @@ T zero(const T&)
return
T
(
0
);
}
template
<
class
T
>
typename
std
::
conditional_t
<
std
::
is_integral
<
T
>
{},
std
::
make_signed
<
T
>
,
std
::
enable_if
<
true
,
T
>>::
type
make_signed
(
T
x
)
{
return
x
;
}
//
// cpu implemenataion of batch norm for inference
//
...
...
@@ -64,7 +73,7 @@ struct cpu_batch_norm_inference
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_variance
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
par_
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
[
&
](
std
::
size_t
n
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
assert
((
variance
(
c
)
+
epsilon
)
>
0
);
result
(
n
,
c
,
h
,
w
)
=
gamma
(
c
)
*
(
buffer
(
n
,
c
,
h
,
w
)
-
mean
(
c
))
/
...
...
@@ -79,7 +88,7 @@ struct cpu_batch_norm_inference
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_mean
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
par_
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
[
&
](
std
::
size_t
n
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
assert
((
variance
(
c
,
h
,
w
)
+
epsilon
)
>
0
);
result
(
n
,
c
,
h
,
w
)
=
gamma
(
c
,
h
,
w
)
*
...
...
@@ -94,6 +103,43 @@ struct cpu_batch_norm_inference
}
};
struct
cpu_lrn
{
op
::
lrn
op
;
std
::
string
name
()
const
{
return
"cpu::lrn"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
int
n_batch
=
output_shape
.
lens
()[
0
];
int
channels
=
output_shape
.
lens
()[
1
];
int
height
=
output_shape
.
lens
()[
2
];
int
width
=
output_shape
.
lens
()[
3
];
float
alphaoverarea
=
op
.
alpha
/
op
.
size
;
int
radius
=
(
op
.
size
-
1
)
/
2
;
par_dfor
(
n_batch
,
height
,
width
)([
&
](
int
b
,
int
h
,
int
w
)
{
float
scale
=
0
;
dfor
(
channels
)([
&
](
int
c
)
{
auto
start
=
(
c
-
radius
)
<
0
?
0
:
(
c
-
radius
);
auto
end
=
(
c
+
radius
)
>
channels
?
channels
:
(
c
+
radius
);
for
(
auto
k
=
start
;
k
<
end
;
++
k
)
{
scale
+=
std
::
pow
(
input
(
b
,
k
,
h
,
w
),
2
);
}
scale
*=
alphaoverarea
;
scale
+=
op
.
bias
;
scale
=
std
::
pow
(
scale
,
-
op
.
beta
);
output
(
b
,
c
,
h
,
w
)
=
input
(
b
,
c
,
h
,
w
)
*
scale
;
});
});
});
return
result
;
}
};
struct
cpu_convolution
{
op
::
convolution
op
;
...
...
@@ -104,28 +150,33 @@ struct cpu_convolution
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input
,
auto
weights
)
{
auto
in_h
=
input
.
get_shape
().
lens
()[
2
];
auto
in_w
=
input
.
get_shape
().
lens
()[
3
];
auto
wei_c
=
weights
.
get_shape
().
lens
()[
1
];
auto
wei_h
=
weights
.
get_shape
().
lens
()[
2
];
auto
wei_w
=
weights
.
get_shape
().
lens
()[
3
];
dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
auto
in
=
input
.
get_shape
().
lens
();
auto
in_h
=
in
[
2
];
auto
in_w
=
in
[
3
];
auto
wei
=
weights
.
get_shape
().
lens
();
auto
wei_n
=
wei
[
0
];
auto
wei_c
=
wei
[
1
];
auto
wei_h
=
wei
[
2
];
auto
wei_w
=
wei
[
3
];
par_dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
int
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
int
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
int
group_id
=
w
/
(
wei_n
/
op
.
group
);
double
acc
=
0
;
dfor
(
wei_c
,
wei_h
,
wei_w
)([
&
](
std
::
size_t
k
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
int
in_x
=
start_x
+
x
;
const
int
in_y
=
start_y
+
y
;
const
int
in_x
=
start_x
+
x
;
const
int
in_y
=
start_y
+
y
;
const
int
in_ch
=
group_id
*
wei_c
+
k
;
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
{
acc
+=
input
(
o
,
k
,
in_x
,
in_y
)
*
weights
(
w
,
k
,
x
,
y
);
acc
+=
input
(
o
,
in_ch
,
in_x
,
in_y
)
*
weights
(
w
,
k
,
x
,
y
);
}
});
output
(
o
,
w
,
i
,
j
)
=
acc
;
...
...
@@ -158,7 +209,8 @@ struct cpu_im2col
const
std
::
size_t
&
stride_h
=
op
.
stride
[
0
];
const
std
::
size_t
&
stride_w
=
op
.
stride
[
1
];
int
kdiv2_h
,
kdiv2_w
;
int
kdiv2_h
;
int
kdiv2_w
;
kdiv2_h
=
kernel_h
/
2
;
kdiv2_w
=
kernel_w
/
2
;
// calculate output sizes
...
...
@@ -231,10 +283,10 @@ struct cpu_pooling
auto
in_h
=
input
.
get_shape
().
lens
()[
2
];
auto
in_w
=
input
.
get_shape
().
lens
()[
3
];
dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
par_
dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
int
start_x0
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y0
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
...
...
@@ -271,14 +323,33 @@ struct cpu_contiguous
std
::
string
name
()
const
{
return
"cpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
struct
cpu_pad
{
op
::
pad
op
;
std
::
string
name
()
const
{
return
"cpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
assert
(
output_shape
.
standard
());
argument
result
{
output_shape
};
result
.
visit
([
&
](
auto
output
)
{
std
::
fill
(
output
.
begin
(),
output
.
end
(),
op
.
value
);
});
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
output
(
idx
.
begin
(),
idx
.
end
())
=
input
(
idx
.
begin
(),
idx
.
end
());
shape_for_each
(
input
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
std
::
vector
<
std
::
size_t
>
new_idx
(
idx
.
size
());
std
::
transform
(
idx
.
begin
(),
idx
.
end
(),
op
.
pads
.
begin
(),
new_idx
.
begin
(),
[](
auto
i
,
auto
j
)
{
return
i
+
j
;
});
output
(
new_idx
.
begin
(),
new_idx
.
end
())
=
input
(
idx
.
begin
(),
idx
.
end
());
});
});
return
result
;
}
};
...
...
@@ -290,24 +361,7 @@ struct cpu_concat
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
std
::
vector
<
std
::
size_t
>
coffsets
=
op
.
compute_offsets
(
output_shape
,
args
);
for
(
std
::
size_t
l
=
0
;
l
<
args
.
size
();
l
++
)
{
auto
argl
=
args
[
l
];
std
::
size_t
nelements
=
argl
.
get_shape
().
elements
();
visit_all
(
result
,
argl
)([
&
](
auto
output
,
auto
input
)
{
auto
slice_shape
=
shape
{
output_shape
.
type
(),
input
.
get_shape
().
lens
(),
output_shape
.
strides
()};
auto
slice
=
make_view
(
slice_shape
,
output
.
data
()
+
coffsets
[
l
]);
// cppcheck-suppress useStlAlgorithm
for
(
std
::
size_t
i
=
0
;
i
<
nelements
;
i
++
)
{
slice
[
i
]
=
input
[
i
];
}
});
}
return
result
;
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
...
...
@@ -325,6 +379,18 @@ struct cpu_gemm
}
};
struct
cpu_gather
{
op
::
gather
op
;
std
::
string
name
()
const
{
return
"cpu::gather"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
struct
identity_op
{
std
::
string
name
()
const
{
return
"cpu::identity"
;
}
...
...
@@ -339,7 +405,7 @@ struct abs_op
std
::
string
name
()
const
{
return
"cpu::abs"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
abs
(
x
);
};
return
[](
auto
x
)
{
return
std
::
abs
(
make_signed
(
x
)
);
};
}
};
...
...
@@ -352,6 +418,15 @@ struct exp_op
}
};
struct
log_op
{
std
::
string
name
()
const
{
return
"cpu::log"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
log
(
x
);
};
}
};
struct
sin_op
{
std
::
string
name
()
const
{
return
"cpu::sin"
;
}
...
...
@@ -406,6 +481,24 @@ struct atan_op
}
};
struct
sinh_op
{
std
::
string
name
()
const
{
return
"cpu::sinh"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
sinh
(
x
);
};
}
};
struct
cosh_op
{
std
::
string
name
()
const
{
return
"cpu::cosh"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
cosh
(
x
);
};
}
};
struct
tanh_op
{
std
::
string
name
()
const
{
return
"cpu::tanh"
;
}
...
...
@@ -453,6 +546,17 @@ struct leaky_relu_op
}
};
struct
elu_op
{
op
::
elu
op
;
std
::
string
name
()
const
{
return
"cpu::elu"
;
}
auto
fcn
()
const
{
auto
&
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
a
*
std
::
expm1
(
x
);
};
}
};
template
<
typename
Op
>
struct
cpu_unary
{
...
...
@@ -545,6 +649,24 @@ struct div_op
}
};
struct
max_op
{
std
::
string
name
()
const
{
return
"max"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
std
::
max
(
x
,
y
);
};
}
};
struct
min_op
{
std
::
string
name
()
const
{
return
"min"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
std
::
min
(
x
,
y
);
};
}
};
template
<
typename
Op
>
struct
cpu_binary
{
...
...
@@ -596,22 +718,35 @@ struct cpu_apply
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"contiguous"
]
=
extend_op
<
cpu_contiguous
,
op
::
contiguous
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"concat"
]
=
extend_op
<
cpu_concat
,
op
::
concat
>
();
apply_map
[
"gather"
]
=
extend_op
<
cpu_gather
,
op
::
gather
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"identity"
]
=
simple_op
<
cpu_unary
<
identity_op
>>
();
apply_map
[
"abs"
]
=
simple_op
<
cpu_unary
<
abs_op
>>
();
apply_map
[
"sinh"
]
=
simple_op
<
cpu_unary
<
sinh_op
>>
();
apply_map
[
"cosh"
]
=
simple_op
<
cpu_unary
<
cosh_op
>>
();
apply_map
[
"tanh"
]
=
simple_op
<
cpu_unary
<
tanh_op
>>
();
apply_map
[
"sigmoid"
]
=
simple_op
<
cpu_unary
<
sigmoid_op
>>
();
apply_map
[
"exp"
]
=
simple_op
<
cpu_unary
<
exp_op
>>
();
apply_map
[
"log"
]
=
simple_op
<
cpu_unary
<
log_op
>>
();
apply_map
[
"neg"
]
=
simple_op
<
cpu_unary
<
neg_op
>>
();
apply_map
[
"sin"
]
=
simple_op
<
cpu_unary
<
sin_op
>>
();
apply_map
[
"cos"
]
=
simple_op
<
cpu_unary
<
cos_op
>>
();
apply_map
[
"tan"
]
=
simple_op
<
cpu_unary
<
tan_op
>>
();
apply_map
[
"asin"
]
=
simple_op
<
cpu_unary
<
asin_op
>>
();
apply_map
[
"acos"
]
=
simple_op
<
cpu_unary
<
acos_op
>>
();
apply_map
[
"atan"
]
=
simple_op
<
cpu_unary
<
atan_op
>>
();
apply_map
[
"relu"
]
=
simple_op
<
cpu_unary
<
relu_op
>>
();
apply_map
[
"add"
]
=
simple_op
<
cpu_binary
<
add_op
>>
();
apply_map
[
"sub"
]
=
simple_op
<
cpu_binary
<
sub_op
>>
();
apply_map
[
"mul"
]
=
simple_op
<
cpu_binary
<
mul_op
>>
();
apply_map
[
"div"
]
=
simple_op
<
cpu_binary
<
div_op
>>
();
apply_map
[
"max"
]
=
simple_op
<
cpu_binary
<
max_op
>>
();
apply_map
[
"min"
]
=
simple_op
<
cpu_binary
<
min_op
>>
();
apply_map
[
"softmax"
]
=
simple_op
<
softmax2d
>
();
}
...
...
@@ -658,5 +793,5 @@ struct cpu_apply
void
lowering
::
apply
(
program
&
p
)
const
{
cpu_apply
{
&
p
}.
apply
();
}
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/cpu/target.cpp
View file @
bc5d7f75
#include <migraph/cpu/target.hpp>
#include <migraph/cpu/lowering.hpp>
#include <migraph/auto_contiguous.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/dead_code_elimination.hpp>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
std
::
string
target
::
name
()
const
{
return
"cpu"
;
}
std
::
vector
<
pass
>
target
::
get_passes
(
migraph
::
context
&
)
const
std
::
vector
<
pass
>
target
::
get_passes
(
migraph
x
::
context
&
)
const
{
return
{
auto_contiguous
{},
lowering
{}};
return
{
auto_contiguous
{},
rewrite_rnn
{},
dead_code_elimination
{},
lowering
{},
dead_code_elimination
{}};
}
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/gpu/CMakeLists.txt
View file @
bc5d7f75
...
...
@@ -10,20 +10,35 @@ if(NOT TARGET MIOpen)
message
(
SEND_ERROR
"Cant find miopen"
)
endif
()
add_library
(
migraph_device
add_library
(
migraph
x
_device
device/add.cpp
device/max.cpp
device/min.cpp
device/exp.cpp
device/log.cpp
device/sin.cpp
device/cos.cpp
device/tan.cpp
device/sinh.cpp
device/cosh.cpp
device/asin.cpp
device/acos.cpp
device/atan.cpp
device/add_relu.cpp
device/contiguous.cpp
device/mul.cpp
device/concat.cpp
device/pad.cpp
device/gather.cpp
device/sub.cpp
)
set_target_properties
(
migraph_device PROPERTIES EXPORT_NAME device
)
rocm_clang_tidy_check
(
migraph_device
)
target_link_libraries
(
migraph_device migraph hip::device
)
target_include_directories
(
migraph_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraph_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
set_target_properties
(
migraph
x
_device PROPERTIES EXPORT_NAME device
)
rocm_clang_tidy_check
(
migraph
x
_device
)
target_link_libraries
(
migraph
x
_device migraph
x
hip::device
-Wno-invalid-command-line-argument -amdgpu-target=gfx803 -amdgpu-target=gfx900 -amdgpu-target=gfx906
)
target_include_directories
(
migraph
x
_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraph
x
_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
add_library
(
migraph_gpu
add_library
(
migraph
x
_gpu
eliminate_workspace.cpp
fuse_ops.cpp
hip.cpp
...
...
@@ -37,19 +52,24 @@ add_library(migraph_gpu
concat.cpp
relu.cpp
leaky_relu.cpp
add.cpp
mul.cpp
tanh.cpp
batchnorm.cpp
write_literals.cpp
rocblas.cpp
sigmoid.cpp
abs.cpp
elu.cpp
pad.cpp
gather.cpp
lrn.cpp
)
set_target_properties
(
migraph_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraph_gpu
)
target_link_libraries
(
migraph_gpu PUBLIC migraph MIOpen roc::rocblas
)
target_link_libraries
(
migraph_gpu PRIVATE migraph_device
)
set_target_properties
(
migraph
x
_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraph
x
_gpu
)
target_link_libraries
(
migraph
x
_gpu PUBLIC migraph
x
MIOpen roc::rocblas
)
target_link_libraries
(
migraph
x
_gpu PRIVATE migraph
x
_device
)
rocm_install_targets
(
TARGETS migraph_gpu migraph_device
TARGETS migraph
x
_gpu migraph
x
_device
INCLUDE
${
CMAKE_CURRENT_SOURCE_DIR
}
/include
)
...
...
src/targets/gpu/abs.cpp
0 → 100644
View file @
bc5d7f75
#include <migraphx/gpu/abs.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_abs
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcasted
();
return
inputs
.
at
(
1
);
}
argument
miopen_abs
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
;
float
beta
=
0
;
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
miopenActivationForward
(
ctx
.
get_stream
().
get_miopen
(),
ad
.
get
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
y_desc
.
get
(),
args
[
1
].
implicit
());
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/add.cpp
deleted
100644 → 0
View file @
47c0854d
#include <migraph/gpu/add.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/config.hpp>
#include <migraph/gpu/miopen.hpp>
#include <utility>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
gpu
{
shape
hip_add
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
at
(
0
);
}
argument
hip_add
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
add
(
ctx
.
get_stream
().
get
(),
args
[
2
],
args
[
0
],
args
[
1
]);
return
args
[
2
];
}
shape
miopen_add
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
).
not_broadcasted
();
return
inputs
.
at
(
0
);
}
argument
miopen_add
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
,
beta
=
0
;
auto
a_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
b_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
c_desc
=
make_tensor
(
output_shape
);
miopenOpTensor
(
ctx
.
get_stream
().
get_miopen
(),
miopenTensorOpAdd
,
&
alpha
,
a_desc
.
get
(),
args
[
0
].
implicit
(),
&
alpha
,
b_desc
.
get
(),
args
[
1
].
implicit
(),
&
beta
,
c_desc
.
get
(),
args
[
2
].
implicit
());
return
args
[
2
];
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
src/targets/gpu/batchnorm.cpp
View file @
bc5d7f75
#include <migraph/gpu/batchnorm.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph
x
/gpu/batchnorm.hpp>
#include <migraph
x
/operators.hpp>
#include <migraph
x
/manage_ptr.hpp>
#include <migraph
x
/gpu/miopen.hpp>
#include <utility>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
shape
miopen_batch_norm_inference
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
...
...
@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx,
auto
y_desc
=
make_tensor
(
output_shape
);
auto
bn_desc
=
make_tensor
(
args
[
3
].
get_shape
());
float
alpha
=
1.0
,
beta
=
0.0
f
;
float
alpha
=
1.0
;
float
beta
=
0.0
f
;
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
miopenBatchNormMode_t
(
op
.
bn_mode
),
...
...
@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx,
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/gpu/concat.cpp
View file @
bc5d7f75
#include <migraph/gpu/concat.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/device/concat.hpp>
#include <migraph
x
/gpu/concat.hpp>
#include <migraph
x
/operators.hpp>
#include <migraph
x
/manage_ptr.hpp>
#include <migraph
x
/gpu/miopen.hpp>
#include <migraph
x
/gpu/device/concat.hpp>
#include <utility>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
shape
hip_concat
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
...
...
@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx,
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/gpu/contiguous.cpp
View file @
bc5d7f75
#include <migraph/gpu/contiguous.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph
x
/gpu/contiguous.hpp>
#include <migraph
x
/operators.hpp>
#include <migraph
x
/manage_ptr.hpp>
#include <migraph
x
/gpu/miopen.hpp>
#include <utility>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
shape
miopen_contiguous
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
...
...
@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx,
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/gpu/convolution.cpp
View file @
bc5d7f75
#include <migraph/gpu/convolution.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph
x
/gpu/convolution.hpp>
#include <migraph
x
/operators.hpp>
#include <migraph
x
/manage_ptr.hpp>
#include <migraph
x
/gpu/miopen.hpp>
#include <utility>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
shape
miopen_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
...
...
@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx,
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
,
beta
=
0
;
float
alpha
=
1
;
float
beta
=
0
;
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
...
...
@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx,
shape
miopen_convolution
::
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
instruction_ref
>
inputs
)
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
]
->
get_shape
()
);
auto
w_desc
=
make_tensor
(
inputs
[
1
]
->
get_shape
()
);
auto
x_desc
=
make_tensor
(
inputs
[
0
]);
auto
w_desc
=
make_tensor
(
inputs
[
1
]);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
...
...
@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx,
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]
->
get_shape
()
));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]
->
get_shape
()
));
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
algo
=
perf
.
fwd_algo
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"Find convolution failed"
);
handle
=
ctx
.
get_stream
().
get_miopen
();
algo
=
perf
.
fwd_algo
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
handle
==
ctx
.
get_stream
().
get_miopen
())
return
;
// TODO: Check that workspace hasn't changed
compile
(
ctx
,
output_shape
,
std
::
move
(
inputs
));
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/gpu/device/acos.cpp
0 → 100644
View file @
bc5d7f75
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
acos
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
acos
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/add.cpp
View file @
bc5d7f75
#include <migraph/gpu/device/add.hpp>
#include <migraph/gpu/device/nary.hpp>
#include <migraph
x
/gpu/device/add.hpp>
#include <migraph
x
/gpu/device/nary.hpp>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
device
{
...
...
@@ -22,5 +22,5 @@ void add(hipStream_t stream,
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/gpu/device/add_relu.cpp
View file @
bc5d7f75
#include <migraph/gpu/device/add_relu.hpp>
#include <migraph/gpu/device/nary.hpp>
#include <migraph
x
/gpu/device/add_relu.hpp>
#include <migraph
x
/gpu/device/nary.hpp>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
device
{
...
...
@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream,
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/gpu/device/asin.cpp
0 → 100644
View file @
bc5d7f75
#include <migraphx/gpu/device/asin.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
asin
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
asin
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/atan.cpp
0 → 100644
View file @
bc5d7f75
#include <migraphx/gpu/device/atan.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
atan
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
atan
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/concat.cpp
View file @
bc5d7f75
#include <migraph/shape.hpp>
#include <migraph/argument.hpp>
#include <migraph/gpu/device/concat.hpp>
#include <migraph/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp>
#include <migraph
x
/shape.hpp>
#include <migraph
x
/argument.hpp>
#include <migraph
x
/gpu/device/concat.hpp>
#include <migraph
x
/gpu/device/tensor.hpp>
#include <migraph
x
/gpu/device/launch.hpp>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
device
{
argument
concat
(
hipStream_t
stream
,
const
migraph
::
shape
&
output_shape
,
std
::
vector
<
migraph
::
argument
>
args
,
const
migraph
x
::
shape
&
output_shape
,
std
::
vector
<
migraph
x
::
argument
>
args
,
std
::
vector
<
std
::
size_t
>
offsets
)
{
for
(
std
::
size_t
l
=
0
;
l
<
args
.
size
()
-
1
;
l
++
)
...
...
@@ -34,5 +34,5 @@ argument concat(hipStream_t stream,
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/gpu/device/contiguous.cpp
View file @
bc5d7f75
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/nary.hpp>
#include <migraph
x
/gpu/device/contiguous.hpp>
#include <migraph
x
/gpu/device/nary.hpp>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
migraph
x
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
device
{
...
...
@@ -14,5 +14,5 @@ void contiguous(hipStream_t stream, argument result, argument arg)
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
x
src/targets/gpu/device/cos.cpp
0 → 100644
View file @
bc5d7f75
#include <migraphx/gpu/device/cos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
cos
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
cos
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/cosh.cpp
0 → 100644
View file @
bc5d7f75
#include <migraphx/gpu/device/cosh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
cosh
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
cosh
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
…
3
4
5
6
7
8
9
10
11
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment