Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3a848f0d
Commit
3a848f0d
authored
Mar 19, 2020
by
Paul
Browse files
Merge branch 'develop' into doc2
parents
64e8e30a
d1e945da
Changes
208
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
88 additions
and
36 deletions
+88
-36
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
...targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+3
-2
src/targets/gpu/device/include/migraphx/gpu/device/multi_index.hpp
...ts/gpu/device/include/migraphx/gpu/device/multi_index.hpp
+1
-1
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+10
-11
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
...targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+9
-0
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
+6
-1
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+2
-2
src/targets/gpu/device/log.cpp
src/targets/gpu/device/log.cpp
+1
-1
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+4
-5
src/targets/gpu/device/max.cpp
src/targets/gpu/device/max.cpp
+1
-1
src/targets/gpu/device/min.cpp
src/targets/gpu/device/min.cpp
+1
-1
src/targets/gpu/device/mul.cpp
src/targets/gpu/device/mul.cpp
+3
-2
src/targets/gpu/device/mul_add.cpp
src/targets/gpu/device/mul_add.cpp
+2
-1
src/targets/gpu/device/mul_add_relu.cpp
src/targets/gpu/device/mul_add_relu.cpp
+1
-1
src/targets/gpu/device/pad.cpp
src/targets/gpu/device/pad.cpp
+3
-3
src/targets/gpu/device/pow.cpp
src/targets/gpu/device/pow.cpp
+1
-1
src/targets/gpu/device/prelu.cpp
src/targets/gpu/device/prelu.cpp
+18
-0
src/targets/gpu/device/reduce_prod.cpp
src/targets/gpu/device/reduce_prod.cpp
+18
-0
src/targets/gpu/device/relu.cpp
src/targets/gpu/device/relu.cpp
+1
-1
src/targets/gpu/device/round.cpp
src/targets/gpu/device/round.cpp
+1
-1
src/targets/gpu/device/sigmoid.cpp
src/targets/gpu/device/sigmoid.cpp
+2
-1
No files found.
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
View file @
3a848f0d
...
...
@@ -78,8 +78,9 @@ inline auto gs_launch(hipStream_t stream, index_int n, index_int local = 1024)
index_int
nglobal
=
std
::
min
<
index_int
>
(
256
,
groups
)
*
local
;
return
[
=
](
auto
f
)
{
launch
(
stream
,
nglobal
,
local
)(
[
=
](
auto
idx
)
{
idx
.
global_stride
(
n
,
[
&
](
auto
i
)
{
gs_invoke
(
f
,
i
,
idx
);
});
});
launch
(
stream
,
nglobal
,
local
)([
=
](
auto
idx
)
__device__
{
idx
.
global_stride
(
n
,
[
&
](
auto
i
)
{
gs_invoke
(
f
,
i
,
idx
);
});
});
};
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/multi_index.hpp
View file @
3a848f0d
...
...
@@ -95,7 +95,7 @@ inline auto mi_launch(hipStream_t stream, const hip_shape<N>& global, index_int
auto
nglobal
=
global
.
index
(
nglobal_multi
);
return
[
=
](
auto
f
)
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
auto
midx
=
make_multi_index
(
global
,
idx
.
global
,
nglobal_multi
);
f
(
idx
,
midx
.
for_stride
(
global
.
lens
));
});
...
...
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
View file @
3a848f0d
...
...
@@ -36,7 +36,8 @@ auto nary_nonstandard_nonpacked_impl(hipStream_t stream, F f, argument result, A
MIGRAPHX_TRACE_NARY_FUNCTION
shape
s
{
result
.
get_shape
().
type
(),
result
.
get_shape
().
lens
()};
hip_visit_all
(
s
,
result
,
args
...)([
&
](
auto
standard_shape
,
auto
output
,
auto
...
inputs
)
{
mi_gs_launch
(
stream
,
standard_shape
)([
=
](
auto
idx
)
{
output
[
idx
]
=
f
(
inputs
[
idx
]...);
});
mi_gs_launch
(
stream
,
standard_shape
)([
=
](
auto
idx
)
__device__
{
output
[
idx
]
=
f
(
inputs
[
idx
]...);
});
});
}
...
...
@@ -45,7 +46,7 @@ inline auto create_broadcast_index(index_int len, index_int stride)
auto
next_stride
=
stride
*
len
;
auto
e_next_stride
=
encode_divisor
(
next_stride
);
auto
e_stride
=
encode_divisor
(
stride
);
return
[
=
](
auto
i
)
{
return
[
=
](
auto
i
)
__device__
{
// ( i % next_stride) / stride
return
fast_div
(
i
,
e_stride
)
-
len
*
fast_div
(
i
,
e_next_stride
);
};
...
...
@@ -61,11 +62,11 @@ auto nary_nonstandard_packed_impl(hipStream_t stream,
auto
arg_shape
=
make_array
(
args
...).
front
().
get_shape
();
auto
perm
=
find_permutation
(
arg_shape
);
auto
s
=
reorder_shape
(
arg_shape
,
perm
);
hip_visit_all
(
s
,
result
.
reshape
(
reorder_shape
(
result
.
get_shape
(),
perm
)),
args
.
reshape
(
s
)...)([
&
](
auto
standard_shape
,
auto
output
,
auto
...
inputs
)
{
mi_gs_launch
(
stream
,
standard_shape
)(
[
=
](
auto
idx
)
{
output
[
idx
]
=
f
(
inputs
[
idx
]...);
});
});
hip_visit_all
(
s
,
result
.
reshape
(
reorder_shape
(
result
.
get_shape
(),
perm
)),
args
.
reshape
(
s
)...)(
[
&
](
auto
standard_shape
,
auto
output
,
auto
...
inputs
)
{
mi_gs_launch
(
stream
,
standard_shape
)(
[
=
](
auto
idx
)
__device__
{
output
[
idx
]
=
f
(
inputs
[
idx
]...);
});
});
}
template
<
class
F
,
class
...
Arguments
>
...
...
@@ -93,7 +94,6 @@ void nary_broadcast_vec_impl(
using
type
=
typename
decltype
(
output
)
::
value_type
;
const
index_int
nelements
=
output
.
size
()
/
vec_size
;
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPHX_DEVICE_SHARED
type
buffer
[
2048
/
vec_size
];
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
...
...
@@ -185,7 +185,6 @@ void nary_double_broadcast_vec_impl(
using
type
=
typename
decltype
(
output
)
::
value_type
;
const
index_int
nelements
=
output
.
size
()
/
vec_size
;
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPHX_DEVICE_SHARED
type
buffer
[
2048
/
vec_size
];
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
...
...
@@ -274,7 +273,7 @@ void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments.
const
index_int
vec_size
=
4
;
auto
data
=
pack_vec
<
4
>
(
device_cast
(
inputs
.
data
())...);
auto
*
outp
=
as_vec
<
4
>
(
device_cast
(
output
.
data
()));
gs_launch
(
stream
,
output_shape
.
elements
()
/
vec_size
)([
=
](
auto
i
)
{
gs_launch
(
stream
,
output_shape
.
elements
()
/
vec_size
)([
=
](
auto
i
)
__device__
{
vec
<
type
,
4
>
out
=
outp
[
i
];
data
(
[
&
](
auto
...
xs
)
{
...
...
@@ -295,7 +294,7 @@ void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... a
MIGRAPHX_TRACE_NARY_FUNCTION
index_int
nelements
=
result
.
get_shape
().
elements
();
hip_pointer_visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
output
[
i
]
=
f
(
inputs
[
i
]...);
});
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
output
[
i
]
=
f
(
inputs
[
i
]...);
});
});
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
View file @
3a848f0d
...
...
@@ -20,6 +20,15 @@ struct sum
}
};
struct
product
{
template
<
class
T
,
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
auto
operator
()(
T
x
,
U
y
)
const
{
return
x
*
y
;
}
};
struct
id
{
template
<
class
T
>
...
...
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
View file @
3a848f0d
...
...
@@ -39,7 +39,12 @@ constexpr void visit_tensor_size(index_int n, F f)
f
(
std
::
integral_constant
<
index_int
,
5
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
case
6
:
{
f
(
std
::
integral_constant
<
index_int
,
6
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Tensor size dim out of range"
);
}
}
...
...
src/targets/gpu/device/int8_gemm_pack.cpp
View file @
3a848f0d
...
...
@@ -24,7 +24,7 @@ void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
...
...
@@ -55,7 +55,7 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
...
...
src/targets/gpu/device/log.cpp
View file @
3a848f0d
...
...
@@ -9,7 +9,7 @@ namespace device {
void
log
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
log
(
to_hip_type
(
x
));
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
log
(
to_hip_type
(
x
));
});
}
}
// namespace device
...
...
src/targets/gpu/device/logsoftmax.cpp
View file @
3a848f0d
...
...
@@ -11,11 +11,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
void
logsoftmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
axis
)
void
logsoftmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
64_t
axis
)
{
auto
lens
=
result
.
get_shape
().
lens
();
auto
batch_lens
=
lens
;
index_int
batch_item_num
=
lens
[
axis
];
auto
batch_lens
=
result
.
get_shape
().
lens
();
index_int
batch_item_num
=
batch_lens
[
axis
];
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
result
.
get_shape
().
type
(),
batch_lens
};
...
...
@@ -44,7 +43,7 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
auto
log_batch_sum
=
::
log
(
to_hip_type
(
batch_sum
))
+
batch_max
;
idx
.
local_stride
(
batch_item_num
,
[
&
](
auto
j
)
{
idx
.
local_stride
(
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
output
[
data_idx
]
=
input
[
data_idx
]
-
log_batch_sum
;
});
...
...
src/targets/gpu/device/max.cpp
View file @
3a848f0d
...
...
@@ -10,7 +10,7 @@ namespace device {
void
max
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)(
[](
auto
x
,
auto
y
)
{
return
std
::
max
(
to_hip_type
(
x
),
to_hip_type
(
y
));
});
[](
auto
x
,
auto
y
)
__device__
{
return
::
max
(
to_hip_type
(
x
),
to_hip_type
(
y
));
});
}
}
// namespace device
...
...
src/targets/gpu/device/min.cpp
View file @
3a848f0d
...
...
@@ -10,7 +10,7 @@ namespace device {
void
min
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)(
[](
auto
x
,
auto
y
)
{
return
std
::
min
(
to_hip_type
(
x
),
to_hip_type
(
y
));
});
[](
auto
x
,
auto
y
)
__device__
{
return
::
min
(
to_hip_type
(
x
),
to_hip_type
(
y
));
});
}
}
// namespace device
...
...
src/targets/gpu/device/mul.cpp
View file @
3a848f0d
...
...
@@ -8,7 +8,7 @@ namespace device {
void
mul
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
x
*
y
;
});
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
return
x
*
y
;
});
}
void
mul
(
hipStream_t
stream
,
...
...
@@ -17,7 +17,8 @@ void mul(hipStream_t stream,
const
argument
&
arg2
,
const
argument
&
arg3
)
{
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
y
,
auto
z
)
{
return
x
*
y
*
z
;
});
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
y
,
auto
z
)
__device__
{
return
x
*
y
*
z
;
});
}
}
// namespace device
...
...
src/targets/gpu/device/mul_add.cpp
View file @
3a848f0d
...
...
@@ -12,7 +12,8 @@ void mul_add(hipStream_t stream,
const
argument
&
arg2
,
const
argument
&
arg3
)
{
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
a
,
auto
b
)
{
return
a
*
x
+
b
;
});
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
a
,
auto
b
)
__device__
{
return
a
*
x
+
b
;
});
}
}
// namespace device
...
...
src/targets/gpu/device/mul_add_relu.cpp
View file @
3a848f0d
...
...
@@ -13,7 +13,7 @@ void mul_add_relu(hipStream_t stream,
const
argument
&
arg3
)
{
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)(
[](
auto
x
,
auto
a
,
auto
b
)
{
return
std
::
max
<
decltype
(
a
*
x
+
b
)
>
(
0
,
a
*
x
+
b
);
});
[](
auto
x
,
auto
a
,
auto
b
)
__device__
{
return
::
max
<
decltype
(
a
*
x
+
b
)
>
(
0
,
a
*
x
+
b
);
});
}
}
// namespace device
...
...
src/targets/gpu/device/pad.cpp
View file @
3a848f0d
...
...
@@ -23,12 +23,12 @@ pad(hipStream_t stream, argument result, argument arg1, float value, std::vector
{
device_val
=
device_cast
(
std
::
numeric_limits
<
type
>::
lowest
());
}
gs_launch
(
stream
,
result
.
get_shape
().
elements
())(
[
=
](
auto
i
)
{
output
.
data
()[
i
]
=
device_val
;
});
gs_launch
(
stream
,
result
.
get_shape
().
elements
())(
[
=
](
auto
i
)
__device__
{
output
.
data
()[
i
]
=
device_val
;
});
hip_index
offsets
;
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
auto
idx
=
input
.
get_shape
().
multi
(
i
);
for
(
std
::
size_t
j
=
0
;
j
<
offsets
.
size
();
j
++
)
{
...
...
src/targets/gpu/device/pow.cpp
View file @
3a848f0d
...
...
@@ -9,7 +9,7 @@ namespace device {
void
pow
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)(
[](
auto
b
,
auto
e
)
{
return
::
pow
(
to_hip_type
(
b
),
to_hip_type
(
e
));
});
[](
auto
b
,
auto
e
)
__device__
{
return
::
pow
(
to_hip_type
(
b
),
to_hip_type
(
e
));
});
}
}
// namespace device
...
...
src/targets/gpu/device/prelu.cpp
0 → 100644
View file @
3a848f0d
#include <migraphx/gpu/device/prelu.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
prelu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
slope
)
__device__
{
return
((
x
<
0
)
?
(
x
*
slope
)
:
x
);
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/reduce_prod.cpp
0 → 100644
View file @
3a848f0d
#include <migraphx/gpu/device/reduce_prod.hpp>
#include <migraphx/gpu/device/reduce.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
reduce_prod
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
reduce
(
stream
,
result
,
arg
,
product
{},
1
,
id
{},
id
{});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/relu.cpp
View file @
3a848f0d
...
...
@@ -8,7 +8,7 @@ namespace device {
void
relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
std
::
max
<
decltype
(
x
)
>
(
0
,
x
);
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
max
<
decltype
(
x
)
>
(
0
,
x
);
});
}
}
// namespace device
...
...
src/targets/gpu/device/round.cpp
View file @
3a848f0d
...
...
@@ -9,7 +9,7 @@ namespace device {
void
round
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
round
(
to_hip_type
(
x
));
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
round
(
to_hip_type
(
x
));
});
}
}
// namespace device
...
...
src/targets/gpu/device/sigmoid.cpp
View file @
3a848f0d
...
...
@@ -9,7 +9,8 @@ namespace device {
void
sigmoid
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
1.
f
/
(
1.
f
+
::
exp
(
to_hip_type
(
-
x
)));
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
1.
f
/
(
1.
f
+
::
exp
(
to_hip_type
(
-
x
)));
});
}
}
// namespace device
...
...
Prev
1
2
3
4
5
6
7
8
9
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment