Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
a5c1c7f6
Unverified
Commit
a5c1c7f6
authored
Feb 10, 2019
by
Paul Fultz II
Committed by
GitHub
Feb 10, 2019
Browse files
Merge branch 'develop' into mem_color_ordering_fix
parents
462a4920
d516b099
Changes
303
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
374 additions
and
162 deletions
+374
-162
src/targets/cpu/gemm.cpp
src/targets/cpu/gemm.cpp
+2
-2
src/targets/cpu/include/migraphx/cpu/context.hpp
src/targets/cpu/include/migraphx/cpu/context.hpp
+4
-4
src/targets/cpu/include/migraphx/cpu/gemm.hpp
src/targets/cpu/include/migraphx/cpu/gemm.hpp
+4
-4
src/targets/cpu/include/migraphx/cpu/lowering.hpp
src/targets/cpu/include/migraphx/cpu/lowering.hpp
+4
-4
src/targets/cpu/include/migraphx/cpu/target.hpp
src/targets/cpu/include/migraphx/cpu/target.hpp
+4
-4
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+181
-46
src/targets/cpu/target.cpp
src/targets/cpu/target.cpp
+9
-3
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+22
-4
src/targets/gpu/abs.cpp
src/targets/gpu/abs.cpp
+39
-0
src/targets/gpu/add.cpp
src/targets/gpu/add.cpp
+0
-55
src/targets/gpu/batchnorm.cpp
src/targets/gpu/batchnorm.cpp
+4
-3
src/targets/gpu/concat.cpp
src/targets/gpu/concat.cpp
+2
-2
src/targets/gpu/contiguous.cpp
src/targets/gpu/contiguous.cpp
+2
-2
src/targets/gpu/convolution.cpp
src/targets/gpu/convolution.cpp
+37
-23
src/targets/gpu/device/acos.cpp
src/targets/gpu/device/acos.cpp
+18
-0
src/targets/gpu/device/add.cpp
src/targets/gpu/device/add.cpp
+2
-2
src/targets/gpu/device/add_relu.cpp
src/targets/gpu/device/add_relu.cpp
+2
-2
src/targets/gpu/device/asin.cpp
src/targets/gpu/device/asin.cpp
+18
-0
src/targets/gpu/device/atan.cpp
src/targets/gpu/device/atan.cpp
+18
-0
src/targets/gpu/device/concat.cpp
src/targets/gpu/device/concat.cpp
+2
-2
No files found.
src/targets/cpu/gemm.cpp
View file @
a5c1c7f6
...
...
@@ -4,7 +4,7 @@
#include <blaze/math/CustomMatrix.h>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
template
<
class
T
>
...
...
@@ -94,5 +94,5 @@ void migemm(
}
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
src/targets/cpu/include/migraphx/cpu/context.hpp
View file @
a5c1c7f6
#ifndef MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP
#define MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP
#ifndef MIGRAPH
X
_GUARD_RTGLIB_CONTEXT_HPP
#define MIGRAPH
X
_GUARD_RTGLIB_CONTEXT_HPP
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
struct
context
...
...
@@ -13,7 +13,7 @@ struct context
};
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
#endif
src/targets/cpu/include/migraphx/cpu/gemm.hpp
View file @
a5c1c7f6
#ifndef MIGRAPH_GUARD_RTGLIB_CPU_GEMM_HPP
#define MIGRAPH_GUARD_RTGLIB_CPU_GEMM_HPP
#ifndef MIGRAPH
X
_GUARD_RTGLIB_CPU_GEMM_HPP
#define MIGRAPH
X
_GUARD_RTGLIB_CPU_GEMM_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
);
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
#endif
src/targets/cpu/include/migraphx/cpu/lowering.hpp
View file @
a5c1c7f6
#ifndef MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP
#ifndef MIGRAPH
X
_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPH
X
_GUARD_RTGLIB_CPU_LOWERING_HPP
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
struct
lowering
...
...
@@ -15,7 +15,7 @@ struct lowering
};
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
#endif
src/targets/cpu/include/migraphx/cpu/target.hpp
View file @
a5c1c7f6
#ifndef MIGRAPH_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#define MIGRAPH_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#ifndef MIGRAPH
X
_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#define MIGRAPH
X
_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#include <migraphx/program.hpp>
#include <migraphx/cpu/context.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
struct
target
...
...
@@ -17,7 +17,7 @@ struct target
};
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
#endif
src/targets/cpu/lowering.cpp
View file @
a5c1c7f6
...
...
@@ -5,12 +5,13 @@
#include <migraphx/operators.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp>
#include <migraphx/cpu/gemm.hpp>
#include <unordered_map>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
template
<
typename
T
>
...
...
@@ -19,6 +20,14 @@ T zero(const T&)
return
T
(
0
);
}
template
<
class
T
>
typename
std
::
conditional_t
<
std
::
is_integral
<
T
>
{},
std
::
make_signed
<
T
>
,
std
::
enable_if
<
true
,
T
>>::
type
make_signed
(
T
x
)
{
return
x
;
}
//
// cpu implemenataion of batch norm for inference
//
...
...
@@ -64,7 +73,7 @@ struct cpu_batch_norm_inference
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_variance
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
par_
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
[
&
](
std
::
size_t
n
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
assert
((
variance
(
c
)
+
epsilon
)
>
0
);
result
(
n
,
c
,
h
,
w
)
=
gamma
(
c
)
*
(
buffer
(
n
,
c
,
h
,
w
)
-
mean
(
c
))
/
...
...
@@ -79,7 +88,7 @@ struct cpu_batch_norm_inference
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_mean
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
par_
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
[
&
](
std
::
size_t
n
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
assert
((
variance
(
c
,
h
,
w
)
+
epsilon
)
>
0
);
result
(
n
,
c
,
h
,
w
)
=
gamma
(
c
,
h
,
w
)
*
...
...
@@ -94,6 +103,43 @@ struct cpu_batch_norm_inference
}
};
struct
cpu_lrn
{
op
::
lrn
op
;
std
::
string
name
()
const
{
return
"cpu::lrn"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
int
n_batch
=
output_shape
.
lens
()[
0
];
int
channels
=
output_shape
.
lens
()[
1
];
int
height
=
output_shape
.
lens
()[
2
];
int
width
=
output_shape
.
lens
()[
3
];
float
alphaoverarea
=
op
.
alpha
/
op
.
size
;
int
radius
=
(
op
.
size
-
1
)
/
2
;
par_dfor
(
n_batch
,
height
,
width
)([
&
](
int
b
,
int
h
,
int
w
)
{
float
scale
=
0
;
dfor
(
channels
)([
&
](
int
c
)
{
auto
start
=
(
c
-
radius
)
<
0
?
0
:
(
c
-
radius
);
auto
end
=
(
c
+
radius
)
>
channels
?
channels
:
(
c
+
radius
);
for
(
auto
k
=
start
;
k
<
end
;
++
k
)
{
scale
+=
std
::
pow
(
input
(
b
,
k
,
h
,
w
),
2
);
}
scale
*=
alphaoverarea
;
scale
+=
op
.
bias
;
scale
=
std
::
pow
(
scale
,
-
op
.
beta
);
output
(
b
,
c
,
h
,
w
)
=
input
(
b
,
c
,
h
,
w
)
*
scale
;
});
});
});
return
result
;
}
};
struct
cpu_convolution
{
op
::
convolution
op
;
...
...
@@ -104,28 +150,33 @@ struct cpu_convolution
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input
,
auto
weights
)
{
auto
in_h
=
input
.
get_shape
().
lens
()[
2
];
auto
in_w
=
input
.
get_shape
().
lens
()[
3
];
auto
wei_c
=
weights
.
get_shape
().
lens
()[
1
];
auto
wei_h
=
weights
.
get_shape
().
lens
()[
2
];
auto
wei_w
=
weights
.
get_shape
().
lens
()[
3
];
dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
auto
in
=
input
.
get_shape
().
lens
();
auto
in_h
=
in
[
2
];
auto
in_w
=
in
[
3
];
auto
wei
=
weights
.
get_shape
().
lens
();
auto
wei_n
=
wei
[
0
];
auto
wei_c
=
wei
[
1
];
auto
wei_h
=
wei
[
2
];
auto
wei_w
=
wei
[
3
];
par_dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
int
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
int
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
int
group_id
=
w
/
(
wei_n
/
op
.
group
);
double
acc
=
0
;
dfor
(
wei_c
,
wei_h
,
wei_w
)([
&
](
std
::
size_t
k
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
int
in_x
=
start_x
+
x
;
const
int
in_y
=
start_y
+
y
;
const
int
in_x
=
start_x
+
x
;
const
int
in_y
=
start_y
+
y
;
const
int
in_ch
=
group_id
*
wei_c
+
k
;
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
{
acc
+=
input
(
o
,
k
,
in_x
,
in_y
)
*
weights
(
w
,
k
,
x
,
y
);
acc
+=
input
(
o
,
in_ch
,
in_x
,
in_y
)
*
weights
(
w
,
k
,
x
,
y
);
}
});
output
(
o
,
w
,
i
,
j
)
=
acc
;
...
...
@@ -158,7 +209,8 @@ struct cpu_im2col
const
std
::
size_t
&
stride_h
=
op
.
stride
[
0
];
const
std
::
size_t
&
stride_w
=
op
.
stride
[
1
];
int
kdiv2_h
,
kdiv2_w
;
int
kdiv2_h
;
int
kdiv2_w
;
kdiv2_h
=
kernel_h
/
2
;
kdiv2_w
=
kernel_w
/
2
;
// calculate output sizes
...
...
@@ -231,10 +283,10 @@ struct cpu_pooling
auto
in_h
=
input
.
get_shape
().
lens
()[
2
];
auto
in_w
=
input
.
get_shape
().
lens
()[
3
];
dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
par_
dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
int
start_x0
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y0
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
...
...
@@ -271,14 +323,33 @@ struct cpu_contiguous
std
::
string
name
()
const
{
return
"cpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
struct
cpu_pad
{
op
::
pad
op
;
std
::
string
name
()
const
{
return
"cpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
assert
(
output_shape
.
standard
());
argument
result
{
output_shape
};
result
.
visit
([
&
](
auto
output
)
{
std
::
fill
(
output
.
begin
(),
output
.
end
(),
op
.
value
);
});
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
output
(
idx
.
begin
(),
idx
.
end
())
=
input
(
idx
.
begin
(),
idx
.
end
());
shape_for_each
(
input
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
std
::
vector
<
std
::
size_t
>
new_idx
(
idx
.
size
());
std
::
transform
(
idx
.
begin
(),
idx
.
end
(),
op
.
pads
.
begin
(),
new_idx
.
begin
(),
[](
auto
i
,
auto
j
)
{
return
i
+
j
;
});
output
(
new_idx
.
begin
(),
new_idx
.
end
())
=
input
(
idx
.
begin
(),
idx
.
end
());
});
});
return
result
;
}
};
...
...
@@ -290,24 +361,7 @@ struct cpu_concat
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
std
::
vector
<
std
::
size_t
>
coffsets
=
op
.
compute_offsets
(
output_shape
,
args
);
for
(
std
::
size_t
l
=
0
;
l
<
args
.
size
();
l
++
)
{
auto
argl
=
args
[
l
];
std
::
size_t
nelements
=
argl
.
get_shape
().
elements
();
visit_all
(
result
,
argl
)([
&
](
auto
output
,
auto
input
)
{
auto
slice_shape
=
shape
{
output_shape
.
type
(),
input
.
get_shape
().
lens
(),
output_shape
.
strides
()};
auto
slice
=
make_view
(
slice_shape
,
output
.
data
()
+
coffsets
[
l
]);
// cppcheck-suppress useStlAlgorithm
for
(
std
::
size_t
i
=
0
;
i
<
nelements
;
i
++
)
{
slice
[
i
]
=
input
[
i
];
}
});
}
return
result
;
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
...
...
@@ -325,6 +379,18 @@ struct cpu_gemm
}
};
struct
cpu_gather
{
op
::
gather
op
;
std
::
string
name
()
const
{
return
"cpu::gather"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
struct
identity_op
{
std
::
string
name
()
const
{
return
"cpu::identity"
;
}
...
...
@@ -339,7 +405,7 @@ struct abs_op
std
::
string
name
()
const
{
return
"cpu::abs"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
abs
(
x
);
};
return
[](
auto
x
)
{
return
std
::
abs
(
make_signed
(
x
)
);
};
}
};
...
...
@@ -352,6 +418,15 @@ struct exp_op
}
};
struct
log_op
{
std
::
string
name
()
const
{
return
"cpu::log"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
log
(
x
);
};
}
};
struct
sin_op
{
std
::
string
name
()
const
{
return
"cpu::sin"
;
}
...
...
@@ -406,6 +481,24 @@ struct atan_op
}
};
struct
sinh_op
{
std
::
string
name
()
const
{
return
"cpu::sinh"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
sinh
(
x
);
};
}
};
struct
cosh_op
{
std
::
string
name
()
const
{
return
"cpu::cosh"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
cosh
(
x
);
};
}
};
struct
tanh_op
{
std
::
string
name
()
const
{
return
"cpu::tanh"
;
}
...
...
@@ -453,6 +546,17 @@ struct leaky_relu_op
}
};
struct
elu_op
{
op
::
elu
op
;
std
::
string
name
()
const
{
return
"cpu::elu"
;
}
auto
fcn
()
const
{
auto
&
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
a
*
std
::
expm1
(
x
);
};
}
};
template
<
typename
Op
>
struct
cpu_unary
{
...
...
@@ -545,6 +649,24 @@ struct div_op
}
};
struct
max_op
{
std
::
string
name
()
const
{
return
"max"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
std
::
max
(
x
,
y
);
};
}
};
struct
min_op
{
std
::
string
name
()
const
{
return
"min"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
std
::
min
(
x
,
y
);
};
}
};
template
<
typename
Op
>
struct
cpu_binary
{
...
...
@@ -596,22 +718,35 @@ struct cpu_apply
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"contiguous"
]
=
extend_op
<
cpu_contiguous
,
op
::
contiguous
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"concat"
]
=
extend_op
<
cpu_concat
,
op
::
concat
>
();
apply_map
[
"gather"
]
=
extend_op
<
cpu_gather
,
op
::
gather
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"identity"
]
=
simple_op
<
cpu_unary
<
identity_op
>>
();
apply_map
[
"abs"
]
=
simple_op
<
cpu_unary
<
abs_op
>>
();
apply_map
[
"sinh"
]
=
simple_op
<
cpu_unary
<
sinh_op
>>
();
apply_map
[
"cosh"
]
=
simple_op
<
cpu_unary
<
cosh_op
>>
();
apply_map
[
"tanh"
]
=
simple_op
<
cpu_unary
<
tanh_op
>>
();
apply_map
[
"sigmoid"
]
=
simple_op
<
cpu_unary
<
sigmoid_op
>>
();
apply_map
[
"exp"
]
=
simple_op
<
cpu_unary
<
exp_op
>>
();
apply_map
[
"log"
]
=
simple_op
<
cpu_unary
<
log_op
>>
();
apply_map
[
"neg"
]
=
simple_op
<
cpu_unary
<
neg_op
>>
();
apply_map
[
"sin"
]
=
simple_op
<
cpu_unary
<
sin_op
>>
();
apply_map
[
"cos"
]
=
simple_op
<
cpu_unary
<
cos_op
>>
();
apply_map
[
"tan"
]
=
simple_op
<
cpu_unary
<
tan_op
>>
();
apply_map
[
"asin"
]
=
simple_op
<
cpu_unary
<
asin_op
>>
();
apply_map
[
"acos"
]
=
simple_op
<
cpu_unary
<
acos_op
>>
();
apply_map
[
"atan"
]
=
simple_op
<
cpu_unary
<
atan_op
>>
();
apply_map
[
"relu"
]
=
simple_op
<
cpu_unary
<
relu_op
>>
();
apply_map
[
"add"
]
=
simple_op
<
cpu_binary
<
add_op
>>
();
apply_map
[
"sub"
]
=
simple_op
<
cpu_binary
<
sub_op
>>
();
apply_map
[
"mul"
]
=
simple_op
<
cpu_binary
<
mul_op
>>
();
apply_map
[
"div"
]
=
simple_op
<
cpu_binary
<
div_op
>>
();
apply_map
[
"max"
]
=
simple_op
<
cpu_binary
<
max_op
>>
();
apply_map
[
"min"
]
=
simple_op
<
cpu_binary
<
min_op
>>
();
apply_map
[
"softmax"
]
=
simple_op
<
softmax2d
>
();
}
...
...
@@ -658,5 +793,5 @@ struct cpu_apply
void
lowering
::
apply
(
program
&
p
)
const
{
cpu_apply
{
&
p
}.
apply
();
}
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
src/targets/cpu/target.cpp
View file @
a5c1c7f6
...
...
@@ -2,18 +2,24 @@
#include <migraphx/cpu/target.hpp>
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/dead_code_elimination.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
cpu
{
std
::
string
target
::
name
()
const
{
return
"cpu"
;
}
std
::
vector
<
pass
>
target
::
get_passes
(
migraphx
::
context
&
)
const
{
return
{
auto_contiguous
{},
lowering
{}};
return
{
auto_contiguous
{},
rewrite_rnn
{},
dead_code_elimination
{},
lowering
{},
dead_code_elimination
{}};
}
}
// namespace cpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
src/targets/gpu/CMakeLists.txt
View file @
a5c1c7f6
...
...
@@ -12,15 +12,29 @@ endif()
add_library
(
migraphx_device
device/add.cpp
device/max.cpp
device/min.cpp
device/exp.cpp
device/log.cpp
device/sin.cpp
device/cos.cpp
device/tan.cpp
device/sinh.cpp
device/cosh.cpp
device/asin.cpp
device/acos.cpp
device/atan.cpp
device/add_relu.cpp
device/contiguous.cpp
device/mul.cpp
device/concat.cpp
device/pad.cpp
device/gather.cpp
device/sub.cpp
)
set_target_properties
(
migraphx_device PROPERTIES EXPORT_NAME device
)
rocm_clang_tidy_check
(
migraphx_device
)
target_link_libraries
(
migraphx_device migraphx hip::device -Wno-invalid-command-line-argument -amdgpu-target=gfx803 -amdgpu-target=gfx900 -amdgpu-target=gfx90
3
)
target_link_libraries
(
migraphx_device migraphx hip::device -Wno-invalid-command-line-argument -amdgpu-target=gfx803 -amdgpu-target=gfx900 -amdgpu-target=gfx90
6
)
target_include_directories
(
migraphx_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraphx_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
...
...
@@ -38,12 +52,16 @@ add_library(migraphx_gpu
concat.cpp
relu.cpp
leaky_relu.cpp
add.cpp
sin.cpp
mul.cpp
tanh.cpp
batchnorm.cpp
write_literals.cpp
rocblas.cpp
sigmoid.cpp
abs.cpp
elu.cpp
pad.cpp
gather.cpp
lrn.cpp
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/abs.cpp
0 → 100644
View file @
a5c1c7f6
#include <migraphx/gpu/abs.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_abs
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcasted
();
return
inputs
.
at
(
1
);
}
argument
miopen_abs
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
;
float
beta
=
0
;
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
miopenActivationForward
(
ctx
.
get_stream
().
get_miopen
(),
ad
.
get
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
y_desc
.
get
(),
args
[
1
].
implicit
());
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/add.cpp
deleted
100644 → 0
View file @
462a4920
#include <migraphx/gpu/add.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
gpu
{
shape
hip_add
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
at
(
0
);
}
argument
hip_add
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
add
(
ctx
.
get_stream
().
get
(),
args
[
2
],
args
[
0
],
args
[
1
]);
return
args
[
2
];
}
shape
miopen_add
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
).
not_broadcasted
();
return
inputs
.
at
(
0
);
}
argument
miopen_add
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
,
beta
=
0
;
auto
a_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
b_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
c_desc
=
make_tensor
(
output_shape
);
miopenOpTensor
(
ctx
.
get_stream
().
get_miopen
(),
miopenTensorOpAdd
,
&
alpha
,
a_desc
.
get
(),
args
[
0
].
implicit
(),
&
alpha
,
b_desc
.
get
(),
args
[
1
].
implicit
(),
&
beta
,
c_desc
.
get
(),
args
[
2
].
implicit
());
return
args
[
2
];
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraphx
src/targets/gpu/batchnorm.cpp
View file @
a5c1c7f6
...
...
@@ -5,7 +5,7 @@
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
shape
miopen_batch_norm_inference
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
...
...
@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx,
auto
y_desc
=
make_tensor
(
output_shape
);
auto
bn_desc
=
make_tensor
(
args
[
3
].
get_shape
());
float
alpha
=
1.0
,
beta
=
0.0
f
;
float
alpha
=
1.0
;
float
beta
=
0.0
f
;
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
miopenBatchNormMode_t
(
op
.
bn_mode
),
...
...
@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx,
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
src/targets/gpu/concat.cpp
View file @
a5c1c7f6
...
...
@@ -6,7 +6,7 @@
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
shape
hip_concat
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
...
...
@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx,
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
src/targets/gpu/contiguous.cpp
View file @
a5c1c7f6
...
...
@@ -5,7 +5,7 @@
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
shape
miopen_contiguous
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
...
...
@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx,
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
src/targets/gpu/convolution.cpp
View file @
a5c1c7f6
...
...
@@ -5,7 +5,7 @@
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
shape
miopen_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
...
...
@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx,
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
,
beta
=
0
;
float
alpha
=
1
;
float
beta
=
0
;
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
...
...
@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx,
shape
miopen_convolution
::
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
instruction_ref
>
inputs
)
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
]
->
get_shape
()
);
auto
w_desc
=
make_tensor
(
inputs
[
1
]
->
get_shape
()
);
auto
x_desc
=
make_tensor
(
inputs
[
0
]);
auto
w_desc
=
make_tensor
(
inputs
[
1
]);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
...
...
@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx,
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]
->
get_shape
()
));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]
->
get_shape
()
));
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
algo
=
perf
.
fwd_algo
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"Find convolution failed"
);
handle
=
ctx
.
get_stream
().
get_miopen
();
algo
=
perf
.
fwd_algo
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
handle
==
ctx
.
get_stream
().
get_miopen
())
return
;
// TODO: Check that workspace hasn't changed
compile
(
ctx
,
output_shape
,
std
::
move
(
inputs
));
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/acos.cpp
0 → 100644
View file @
a5c1c7f6
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
acos
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
acos
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/add.cpp
View file @
a5c1c7f6
...
...
@@ -2,7 +2,7 @@
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
device
{
...
...
@@ -22,5 +22,5 @@ void add(hipStream_t stream,
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/add_relu.cpp
View file @
a5c1c7f6
...
...
@@ -2,7 +2,7 @@
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
device
{
...
...
@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream,
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/asin.cpp
0 → 100644
View file @
a5c1c7f6
#include <migraphx/gpu/device/asin.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
asin
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
asin
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/atan.cpp
0 → 100644
View file @
a5c1c7f6
#include <migraphx/gpu/device/atan.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
atan
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
atan
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/concat.cpp
View file @
a5c1c7f6
...
...
@@ -5,7 +5,7 @@
#include <migraphx/gpu/device/launch.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
device
{
...
...
@@ -34,5 +34,5 @@ argument concat(hipStream_t stream,
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraphx
Prev
1
2
3
4
5
6
7
8
9
10
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment