Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3a848f0d
Commit
3a848f0d
authored
Mar 19, 2020
by
Paul
Browse files
Merge branch 'develop' into doc2
parents
64e8e30a
d1e945da
Changes
208
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
88 additions
and
36 deletions
+88
-36
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
...targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+3
-2
src/targets/gpu/device/include/migraphx/gpu/device/multi_index.hpp
...ts/gpu/device/include/migraphx/gpu/device/multi_index.hpp
+1
-1
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+10
-11
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
...targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+9
-0
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
+6
-1
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+2
-2
src/targets/gpu/device/log.cpp
src/targets/gpu/device/log.cpp
+1
-1
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+4
-5
src/targets/gpu/device/max.cpp
src/targets/gpu/device/max.cpp
+1
-1
src/targets/gpu/device/min.cpp
src/targets/gpu/device/min.cpp
+1
-1
src/targets/gpu/device/mul.cpp
src/targets/gpu/device/mul.cpp
+3
-2
src/targets/gpu/device/mul_add.cpp
src/targets/gpu/device/mul_add.cpp
+2
-1
src/targets/gpu/device/mul_add_relu.cpp
src/targets/gpu/device/mul_add_relu.cpp
+1
-1
src/targets/gpu/device/pad.cpp
src/targets/gpu/device/pad.cpp
+3
-3
src/targets/gpu/device/pow.cpp
src/targets/gpu/device/pow.cpp
+1
-1
src/targets/gpu/device/prelu.cpp
src/targets/gpu/device/prelu.cpp
+18
-0
src/targets/gpu/device/reduce_prod.cpp
src/targets/gpu/device/reduce_prod.cpp
+18
-0
src/targets/gpu/device/relu.cpp
src/targets/gpu/device/relu.cpp
+1
-1
src/targets/gpu/device/round.cpp
src/targets/gpu/device/round.cpp
+1
-1
src/targets/gpu/device/sigmoid.cpp
src/targets/gpu/device/sigmoid.cpp
+2
-1
No files found.
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
View file @
3a848f0d
...
@@ -78,8 +78,9 @@ inline auto gs_launch(hipStream_t stream, index_int n, index_int local = 1024)
...
@@ -78,8 +78,9 @@ inline auto gs_launch(hipStream_t stream, index_int n, index_int local = 1024)
index_int
nglobal
=
std
::
min
<
index_int
>
(
256
,
groups
)
*
local
;
index_int
nglobal
=
std
::
min
<
index_int
>
(
256
,
groups
)
*
local
;
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
launch
(
stream
,
nglobal
,
local
)(
launch
(
stream
,
nglobal
,
local
)([
=
](
auto
idx
)
__device__
{
[
=
](
auto
idx
)
{
idx
.
global_stride
(
n
,
[
&
](
auto
i
)
{
gs_invoke
(
f
,
i
,
idx
);
});
});
idx
.
global_stride
(
n
,
[
&
](
auto
i
)
{
gs_invoke
(
f
,
i
,
idx
);
});
});
};
};
}
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/multi_index.hpp
View file @
3a848f0d
...
@@ -95,7 +95,7 @@ inline auto mi_launch(hipStream_t stream, const hip_shape<N>& global, index_int
...
@@ -95,7 +95,7 @@ inline auto mi_launch(hipStream_t stream, const hip_shape<N>& global, index_int
auto
nglobal
=
global
.
index
(
nglobal_multi
);
auto
nglobal
=
global
.
index
(
nglobal_multi
);
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
auto
midx
=
make_multi_index
(
global
,
idx
.
global
,
nglobal_multi
);
auto
midx
=
make_multi_index
(
global
,
idx
.
global
,
nglobal_multi
);
f
(
idx
,
midx
.
for_stride
(
global
.
lens
));
f
(
idx
,
midx
.
for_stride
(
global
.
lens
));
});
});
...
...
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
View file @
3a848f0d
...
@@ -36,7 +36,8 @@ auto nary_nonstandard_nonpacked_impl(hipStream_t stream, F f, argument result, A
...
@@ -36,7 +36,8 @@ auto nary_nonstandard_nonpacked_impl(hipStream_t stream, F f, argument result, A
MIGRAPHX_TRACE_NARY_FUNCTION
MIGRAPHX_TRACE_NARY_FUNCTION
shape
s
{
result
.
get_shape
().
type
(),
result
.
get_shape
().
lens
()};
shape
s
{
result
.
get_shape
().
type
(),
result
.
get_shape
().
lens
()};
hip_visit_all
(
s
,
result
,
args
...)([
&
](
auto
standard_shape
,
auto
output
,
auto
...
inputs
)
{
hip_visit_all
(
s
,
result
,
args
...)([
&
](
auto
standard_shape
,
auto
output
,
auto
...
inputs
)
{
mi_gs_launch
(
stream
,
standard_shape
)([
=
](
auto
idx
)
{
output
[
idx
]
=
f
(
inputs
[
idx
]...);
});
mi_gs_launch
(
stream
,
standard_shape
)([
=
](
auto
idx
)
__device__
{
output
[
idx
]
=
f
(
inputs
[
idx
]...);
});
});
});
}
}
...
@@ -45,7 +46,7 @@ inline auto create_broadcast_index(index_int len, index_int stride)
...
@@ -45,7 +46,7 @@ inline auto create_broadcast_index(index_int len, index_int stride)
auto
next_stride
=
stride
*
len
;
auto
next_stride
=
stride
*
len
;
auto
e_next_stride
=
encode_divisor
(
next_stride
);
auto
e_next_stride
=
encode_divisor
(
next_stride
);
auto
e_stride
=
encode_divisor
(
stride
);
auto
e_stride
=
encode_divisor
(
stride
);
return
[
=
](
auto
i
)
{
return
[
=
](
auto
i
)
__device__
{
// ( i % next_stride) / stride
// ( i % next_stride) / stride
return
fast_div
(
i
,
e_stride
)
-
len
*
fast_div
(
i
,
e_next_stride
);
return
fast_div
(
i
,
e_stride
)
-
len
*
fast_div
(
i
,
e_next_stride
);
};
};
...
@@ -61,11 +62,11 @@ auto nary_nonstandard_packed_impl(hipStream_t stream,
...
@@ -61,11 +62,11 @@ auto nary_nonstandard_packed_impl(hipStream_t stream,
auto
arg_shape
=
make_array
(
args
...).
front
().
get_shape
();
auto
arg_shape
=
make_array
(
args
...).
front
().
get_shape
();
auto
perm
=
find_permutation
(
arg_shape
);
auto
perm
=
find_permutation
(
arg_shape
);
auto
s
=
reorder_shape
(
arg_shape
,
perm
);
auto
s
=
reorder_shape
(
arg_shape
,
perm
);
hip_visit_all
(
s
,
hip_visit_all
(
s
,
result
.
reshape
(
reorder_shape
(
result
.
get_shape
(),
perm
)),
args
.
reshape
(
s
)...)(
result
.
reshape
(
reorder_shape
(
result
.
get_shape
(),
perm
)),
[
&
](
auto
standard_shape
,
auto
output
,
auto
...
inputs
)
{
args
.
reshape
(
s
)...)([
&
](
auto
standard_shape
,
auto
output
,
auto
...
inputs
)
{
mi_gs_launch
(
stream
,
standard_shape
)(
mi_gs_launch
(
stream
,
standard_shape
)(
[
=
](
auto
idx
)
{
output
[
idx
]
=
f
(
inputs
[
idx
]...);
});
[
=
](
auto
idx
)
__device__
{
output
[
idx
]
=
f
(
inputs
[
idx
]...);
});
});
});
}
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
...
@@ -93,7 +94,6 @@ void nary_broadcast_vec_impl(
...
@@ -93,7 +94,6 @@ void nary_broadcast_vec_impl(
using
type
=
typename
decltype
(
output
)
::
value_type
;
using
type
=
typename
decltype
(
output
)
::
value_type
;
const
index_int
nelements
=
output
.
size
()
/
vec_size
;
const
index_int
nelements
=
output
.
size
()
/
vec_size
;
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPHX_DEVICE_SHARED
type
buffer
[
2048
/
vec_size
];
MIGRAPHX_DEVICE_SHARED
type
buffer
[
2048
/
vec_size
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
...
@@ -185,7 +185,6 @@ void nary_double_broadcast_vec_impl(
...
@@ -185,7 +185,6 @@ void nary_double_broadcast_vec_impl(
using
type
=
typename
decltype
(
output
)
::
value_type
;
using
type
=
typename
decltype
(
output
)
::
value_type
;
const
index_int
nelements
=
output
.
size
()
/
vec_size
;
const
index_int
nelements
=
output
.
size
()
/
vec_size
;
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPHX_DEVICE_SHARED
type
buffer
[
2048
/
vec_size
];
MIGRAPHX_DEVICE_SHARED
type
buffer
[
2048
/
vec_size
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
...
@@ -274,7 +273,7 @@ void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments.
...
@@ -274,7 +273,7 @@ void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments.
const
index_int
vec_size
=
4
;
const
index_int
vec_size
=
4
;
auto
data
=
pack_vec
<
4
>
(
device_cast
(
inputs
.
data
())...);
auto
data
=
pack_vec
<
4
>
(
device_cast
(
inputs
.
data
())...);
auto
*
outp
=
as_vec
<
4
>
(
device_cast
(
output
.
data
()));
auto
*
outp
=
as_vec
<
4
>
(
device_cast
(
output
.
data
()));
gs_launch
(
stream
,
output_shape
.
elements
()
/
vec_size
)([
=
](
auto
i
)
{
gs_launch
(
stream
,
output_shape
.
elements
()
/
vec_size
)([
=
](
auto
i
)
__device__
{
vec
<
type
,
4
>
out
=
outp
[
i
];
vec
<
type
,
4
>
out
=
outp
[
i
];
data
(
data
(
[
&
](
auto
...
xs
)
{
[
&
](
auto
...
xs
)
{
...
@@ -295,7 +294,7 @@ void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... a
...
@@ -295,7 +294,7 @@ void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... a
MIGRAPHX_TRACE_NARY_FUNCTION
MIGRAPHX_TRACE_NARY_FUNCTION
index_int
nelements
=
result
.
get_shape
().
elements
();
index_int
nelements
=
result
.
get_shape
().
elements
();
hip_pointer_visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
hip_pointer_visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
output
[
i
]
=
f
(
inputs
[
i
]...);
});
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
output
[
i
]
=
f
(
inputs
[
i
]...);
});
});
});
}
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
View file @
3a848f0d
...
@@ -20,6 +20,15 @@ struct sum
...
@@ -20,6 +20,15 @@ struct sum
}
}
};
};
struct
product
{
template
<
class
T
,
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
auto
operator
()(
T
x
,
U
y
)
const
{
return
x
*
y
;
}
};
struct
id
struct
id
{
{
template
<
class
T
>
template
<
class
T
>
...
...
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
View file @
3a848f0d
...
@@ -39,7 +39,12 @@ constexpr void visit_tensor_size(index_int n, F f)
...
@@ -39,7 +39,12 @@ constexpr void visit_tensor_size(index_int n, F f)
f
(
std
::
integral_constant
<
index_int
,
5
>
{});
f
(
std
::
integral_constant
<
index_int
,
5
>
{});
break
;
break
;
}
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
case
6
:
{
f
(
std
::
integral_constant
<
index_int
,
6
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Tensor size dim out of range"
);
}
}
}
}
...
...
src/targets/gpu/device/int8_gemm_pack.cpp
View file @
3a848f0d
...
@@ -24,7 +24,7 @@ void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument
...
@@ -24,7 +24,7 @@ void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument
auto
*
in_ptr
=
device_cast
(
input
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
std
::
size_t
i_m
=
idx
[
dim_1
];
...
@@ -55,7 +55,7 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument
...
@@ -55,7 +55,7 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument
auto
*
in_ptr
=
device_cast
(
input
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
std
::
size_t
i_n
=
idx
[
dim_1
];
...
...
src/targets/gpu/device/log.cpp
View file @
3a848f0d
...
@@ -9,7 +9,7 @@ namespace device {
...
@@ -9,7 +9,7 @@ namespace device {
void
log
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
void
log
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
log
(
to_hip_type
(
x
));
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
log
(
to_hip_type
(
x
));
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/logsoftmax.cpp
View file @
3a848f0d
...
@@ -11,11 +11,10 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -11,11 +11,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
logsoftmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
axis
)
void
logsoftmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
64_t
axis
)
{
{
auto
lens
=
result
.
get_shape
().
lens
();
auto
batch_lens
=
result
.
get_shape
().
lens
();
auto
batch_lens
=
lens
;
index_int
batch_item_num
=
batch_lens
[
axis
];
index_int
batch_item_num
=
lens
[
axis
];
batch_lens
[
axis
]
=
1
;
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
result
.
get_shape
().
type
(),
batch_lens
};
migraphx
::
shape
batch_shape
{
result
.
get_shape
().
type
(),
batch_lens
};
...
@@ -44,7 +43,7 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
...
@@ -44,7 +43,7 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
auto
log_batch_sum
=
::
log
(
to_hip_type
(
batch_sum
))
+
batch_max
;
auto
log_batch_sum
=
::
log
(
to_hip_type
(
batch_sum
))
+
batch_max
;
idx
.
local_stride
(
batch_item_num
,
[
&
](
auto
j
)
{
idx
.
local_stride
(
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
data_idx
[
axis
]
=
j
;
output
[
data_idx
]
=
input
[
data_idx
]
-
log_batch_sum
;
output
[
data_idx
]
=
input
[
data_idx
]
-
log_batch_sum
;
});
});
...
...
src/targets/gpu/device/max.cpp
View file @
3a848f0d
...
@@ -10,7 +10,7 @@ namespace device {
...
@@ -10,7 +10,7 @@ namespace device {
void
max
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
void
max
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
nary
(
stream
,
result
,
arg1
,
arg2
)(
nary
(
stream
,
result
,
arg1
,
arg2
)(
[](
auto
x
,
auto
y
)
{
return
std
::
max
(
to_hip_type
(
x
),
to_hip_type
(
y
));
});
[](
auto
x
,
auto
y
)
__device__
{
return
::
max
(
to_hip_type
(
x
),
to_hip_type
(
y
));
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/min.cpp
View file @
3a848f0d
...
@@ -10,7 +10,7 @@ namespace device {
...
@@ -10,7 +10,7 @@ namespace device {
void
min
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
void
min
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
nary
(
stream
,
result
,
arg1
,
arg2
)(
nary
(
stream
,
result
,
arg1
,
arg2
)(
[](
auto
x
,
auto
y
)
{
return
std
::
min
(
to_hip_type
(
x
),
to_hip_type
(
y
));
});
[](
auto
x
,
auto
y
)
__device__
{
return
::
min
(
to_hip_type
(
x
),
to_hip_type
(
y
));
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/mul.cpp
View file @
3a848f0d
...
@@ -8,7 +8,7 @@ namespace device {
...
@@ -8,7 +8,7 @@ namespace device {
void
mul
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
void
mul
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
x
*
y
;
});
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
return
x
*
y
;
});
}
}
void
mul
(
hipStream_t
stream
,
void
mul
(
hipStream_t
stream
,
...
@@ -17,7 +17,8 @@ void mul(hipStream_t stream,
...
@@ -17,7 +17,8 @@ void mul(hipStream_t stream,
const
argument
&
arg2
,
const
argument
&
arg2
,
const
argument
&
arg3
)
const
argument
&
arg3
)
{
{
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
y
,
auto
z
)
{
return
x
*
y
*
z
;
});
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
y
,
auto
z
)
__device__
{
return
x
*
y
*
z
;
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/mul_add.cpp
View file @
3a848f0d
...
@@ -12,7 +12,8 @@ void mul_add(hipStream_t stream,
...
@@ -12,7 +12,8 @@ void mul_add(hipStream_t stream,
const
argument
&
arg2
,
const
argument
&
arg2
,
const
argument
&
arg3
)
const
argument
&
arg3
)
{
{
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
a
,
auto
b
)
{
return
a
*
x
+
b
;
});
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
a
,
auto
b
)
__device__
{
return
a
*
x
+
b
;
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/mul_add_relu.cpp
View file @
3a848f0d
...
@@ -13,7 +13,7 @@ void mul_add_relu(hipStream_t stream,
...
@@ -13,7 +13,7 @@ void mul_add_relu(hipStream_t stream,
const
argument
&
arg3
)
const
argument
&
arg3
)
{
{
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)(
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)(
[](
auto
x
,
auto
a
,
auto
b
)
{
return
std
::
max
<
decltype
(
a
*
x
+
b
)
>
(
0
,
a
*
x
+
b
);
});
[](
auto
x
,
auto
a
,
auto
b
)
__device__
{
return
::
max
<
decltype
(
a
*
x
+
b
)
>
(
0
,
a
*
x
+
b
);
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/pad.cpp
View file @
3a848f0d
...
@@ -23,12 +23,12 @@ pad(hipStream_t stream, argument result, argument arg1, float value, std::vector
...
@@ -23,12 +23,12 @@ pad(hipStream_t stream, argument result, argument arg1, float value, std::vector
{
{
device_val
=
device_cast
(
std
::
numeric_limits
<
type
>::
lowest
());
device_val
=
device_cast
(
std
::
numeric_limits
<
type
>::
lowest
());
}
}
gs_launch
(
stream
,
gs_launch
(
stream
,
result
.
get_shape
().
elements
())(
result
.
get_shape
().
elements
())(
[
=
](
auto
i
)
{
output
.
data
()[
i
]
=
device_val
;
});
[
=
](
auto
i
)
__device__
{
output
.
data
()[
i
]
=
device_val
;
});
hip_index
offsets
;
hip_index
offsets
;
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
auto
idx
=
input
.
get_shape
().
multi
(
i
);
auto
idx
=
input
.
get_shape
().
multi
(
i
);
for
(
std
::
size_t
j
=
0
;
j
<
offsets
.
size
();
j
++
)
for
(
std
::
size_t
j
=
0
;
j
<
offsets
.
size
();
j
++
)
{
{
...
...
src/targets/gpu/device/pow.cpp
View file @
3a848f0d
...
@@ -9,7 +9,7 @@ namespace device {
...
@@ -9,7 +9,7 @@ namespace device {
void
pow
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
void
pow
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
nary
(
stream
,
result
,
arg1
,
arg2
)(
nary
(
stream
,
result
,
arg1
,
arg2
)(
[](
auto
b
,
auto
e
)
{
return
::
pow
(
to_hip_type
(
b
),
to_hip_type
(
e
));
});
[](
auto
b
,
auto
e
)
__device__
{
return
::
pow
(
to_hip_type
(
b
),
to_hip_type
(
e
));
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/prelu.cpp
0 → 100644
View file @
3a848f0d
#include <migraphx/gpu/device/prelu.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
prelu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
slope
)
__device__
{
return
((
x
<
0
)
?
(
x
*
slope
)
:
x
);
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/reduce_prod.cpp
0 → 100644
View file @
3a848f0d
#include <migraphx/gpu/device/reduce_prod.hpp>
#include <migraphx/gpu/device/reduce.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
reduce_prod
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
reduce
(
stream
,
result
,
arg
,
product
{},
1
,
id
{},
id
{});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/relu.cpp
View file @
3a848f0d
...
@@ -8,7 +8,7 @@ namespace device {
...
@@ -8,7 +8,7 @@ namespace device {
void
relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
void
relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
std
::
max
<
decltype
(
x
)
>
(
0
,
x
);
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
max
<
decltype
(
x
)
>
(
0
,
x
);
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/round.cpp
View file @
3a848f0d
...
@@ -9,7 +9,7 @@ namespace device {
...
@@ -9,7 +9,7 @@ namespace device {
void
round
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
void
round
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
round
(
to_hip_type
(
x
));
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
round
(
to_hip_type
(
x
));
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/sigmoid.cpp
View file @
3a848f0d
...
@@ -9,7 +9,8 @@ namespace device {
...
@@ -9,7 +9,8 @@ namespace device {
void
sigmoid
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
void
sigmoid
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
1.
f
/
(
1.
f
+
::
exp
(
to_hip_type
(
-
x
)));
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
1.
f
/
(
1.
f
+
::
exp
(
to_hip_type
(
-
x
)));
});
}
}
}
// namespace device
}
// namespace device
...
...
Prev
1
2
3
4
5
6
7
8
9
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment