Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
a69937d3
Commit
a69937d3
authored
Oct 14, 2021
by
Jing Zhang
Browse files
add maxpool host for validation
parent
ec381569
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
7 deletions
+26
-7
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2_add.hpp
...l/include/tensor_operation/gridwise_gemm_dlops_v2_add.hpp
+7
-7
host/host_tensor/include/host_conv.hpp
host/host_tensor/include/host_conv.hpp
+19
-0
No files found.
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2_add.hpp
View file @
a69937d3
...
...
@@ -976,7 +976,7 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
}
#endif
#if
0
#if
1
// Resize_Add
if
constexpr
(
add_type
==
0
)
{
...
...
@@ -1137,11 +1137,11 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
make_tuple
(
ki
,
0
,
hi
*
2
+
1
,
wi
*
2
+
1
));
d_thread_buf
(
Number
<
d_offset
>
{})
=
c_thread_buf
[
Number
<
c_offset_0
>
{}];
d_thread_buf
(
Number
<
d_offset
>
{})
=
max
(
c_thread_buf
[
Number
<
c_offset_1
>
{}],
d_thread_buf
(
Number
<
d_offset
>
{})
=
f
max
f
(
c_thread_buf
[
Number
<
c_offset_1
>
{}],
d_thread_buf
(
Number
<
d_offset
>
{}));
d_thread_buf
(
Number
<
d_offset
>
{})
=
max
(
c_thread_buf
[
Number
<
c_offset_2
>
{}],
d_thread_buf
(
Number
<
d_offset
>
{})
=
f
max
f
(
c_thread_buf
[
Number
<
c_offset_2
>
{}],
d_thread_buf
(
Number
<
d_offset
>
{}));
d_thread_buf
(
Number
<
d_offset
>
{})
=
max
(
c_thread_buf
[
Number
<
c_offset_3
>
{}],
d_thread_buf
(
Number
<
d_offset
>
{})
=
f
max
(
c_thread_buf
[
Number
<
c_offset_3
>
{}],
d_thread_buf
(
Number
<
d_offset
>
{}));
});
});
...
...
host/host_tensor/include/host_conv.hpp
View file @
a69937d3
...
...
@@ -284,6 +284,25 @@ void host_direct_convolution_maxpool_nchwc(const Tensor<TIn>& in,
out_host
.
mDesc
.
GetLengths
()[
2
],
out_host
.
mDesc
.
GetLengths
()[
3
],
out_host
.
mDesc
.
GetLengths
()[
4
])(
std
::
thread
::
hardware_concurrency
());
auto
maxpool_nchw
=
[
&
](
auto
n
,
auto
k0
,
auto
ho
,
auto
wo
,
auto
k1
)
{
auto
hx
=
ho
*
2
;
auto
wx
=
wo
*
2
;
auto
v0
=
out_host
(
n
,
k0
,
hx
,
wx
,
k1
);
auto
v1
=
out_host
(
n
,
k0
,
hx
,
wx
+
1
,
k1
);
auto
v2
=
out_host
(
n
,
k0
,
hx
+
1
,
wx
,
k1
);
auto
v3
=
out_host
(
n
,
k0
,
hx
+
1
,
wx
+
1
,
k1
);
max_host
(
n
,
k0
,
ho
,
wo
,
k1
)
=
std
::
max
({
v0
,
v1
,
v2
,
v3
});
};
make_ParallelTensorFunctor
(
maxpool_nchw
,
max_host
.
mDesc
.
GetLengths
()[
0
],
max_host
.
mDesc
.
GetLengths
()[
1
],
max_host
.
mDesc
.
GetLengths
()[
2
],
max_host
.
mDesc
.
GetLengths
()[
3
],
max_host
.
mDesc
.
GetLengths
()[
4
])(
std
::
thread
::
hardware_concurrency
());
}
template
<
typename
TIn
,
typename
TWei
,
typename
TOut
,
typename
InLeftPads
,
typename
InRightPads
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment