Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
ffa7e4be
Commit
ffa7e4be
authored
Sep 24, 2020
by
Chao Liu
Browse files
refactor
parent
fc7a6c85
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
92 additions
and
59 deletions
+92
-59
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
+48
-55
driver/include/device_dummy_dynamic_transform_v2.hpp
driver/include/device_dummy_dynamic_transform_v2.hpp
+44
-4
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
View file @
ffa7e4be
...
...
@@ -81,16 +81,15 @@ map_convolution_into_gemm_v2(const WeiDesc& wei_k_c_y_x_global_desc,
}
template
<
index_t
BlockSize
>
struct
DummyDynamicTransform_v2
struct
DummyDynamicTransform_v2
_1
{
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
,
typename
TransformInDesc
>
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
>
__device__
void
Run_1
(
index_t
*
const
__restrict__
p_wei_global
,
float
*
const
__restrict__
p_in_global
,
float
*
const
__restrict__
p_out_global
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
TransformInDesc
/* in_gemmk_gemmn_global_desc */
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
...
...
@@ -131,14 +130,13 @@ struct DummyDynamicTransform_v2
}
}
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
,
typename
TransformInDesc
>
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
>
__device__
void
Run_2
(
index_t
*
const
__restrict__
p_wei_global
,
float
*
const
__restrict__
p_in_global
,
float
*
const
__restrict__
p_out_global
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
TransformInDesc
/* in_gemmk_gemmn_global_desc */
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
...
...
@@ -251,18 +249,39 @@ struct DummyDynamicTransform_v2
#endif
}
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
,
typename
TransformInDesc
>
__device__
void
Run
_3
(
index_t
*
const
__restrict__
p_wei_global
,
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
>
__device__
void
Run
(
index_t
*
const
__restrict__
p_wei_global
,
float
*
const
__restrict__
p_in_global
,
float
*
const
__restrict__
p_out_global
,
const
WeiDesc
/* wei_k_c_y_x_global_desc */
,
const
InDesc
/* in_n_c_hi_wi_global_desc */
,
const
OutDesc
/* out_n_k_ho_wo_global_desc */
,
const
TransformInDesc
in_gemmk_gemmn_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_right_pads
)
const
{
Run_1
(
p_wei_global
,
p_in_global
,
p_out_global
,
wei_k_c_y_x_global_desc
,
in_n_c_hi_wi_global_desc
,
out_n_k_ho_wo_global_desc
,
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
}
};
template
<
index_t
BlockSize
>
struct
DummyDynamicTransform_v2_2
{
template
<
typename
TransformInDesc
>
__device__
void
Run
(
index_t
*
const
__restrict__
p_wei_global
,
float
*
const
__restrict__
p_in_global
,
float
*
const
__restrict__
p_out_global
,
const
TransformInDesc
in_gemmk_gemmn_global_desc
)
const
{
MultiIndex
<
2
>
idx
;
...
...
@@ -309,32 +328,6 @@ struct DummyDynamicTransform_v2
p_out_global
[
in_gemmk_gemmn_global_desc
.
CalculateOffset
(
idx
)]
=
1
;
#endif
}
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
,
typename
TransformInDesc
>
__device__
void
Run
(
index_t
*
const
__restrict__
p_wei_global
,
float
*
const
__restrict__
p_in_global
,
float
*
const
__restrict__
p_out_global
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
TransformInDesc
in_gemmk_gemmn_global_desc
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_right_pads
)
const
{
Run_1
(
p_wei_global
,
p_in_global
,
p_out_global
,
wei_k_c_y_x_global_desc
,
in_n_c_hi_wi_global_desc
,
out_n_k_ho_wo_global_desc
,
in_gemmk_gemmn_global_desc
,
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
}
};
}
// namespace ck
...
...
driver/include/device_dummy_dynamic_transform_v2.hpp
View file @
ffa7e4be
...
...
@@ -50,6 +50,26 @@ void device_dummy_dynamic_transform_v2(InDesc,
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
// test on cpu
{
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{{
0
,
0
}});
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{{
1
,
0
}});
for
(
index_t
iter
=
0
;
iter
<
10
;
++
iter
)
{
printf
(
"iter %d
\n
"
,
iter
);
print_array
(
"idx: "
,
in_gemmk_gemmn_coord
.
GetIndex
());
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_coord
.
GetOffset
());
printf
(
"
\n
"
);
move_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
}
}
std
::
size_t
data_sz
=
sizeof
(
T
);
DeviceMem
in_nchw_device_buf
(
data_sz
*
in_nchw
.
mDesc
.
GetElementSpace
());
DeviceMem
wei_kcyx_device_buf
(
data_sz
*
wei_kcyx
.
mDesc
.
GetElementSpace
());
...
...
@@ -64,8 +84,6 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf
(
"%s: BlockSize %u, GridSize %u
\n
"
,
__func__
,
BlockSize
,
GridSize
);
using
dummy_transform
=
DummyDynamicTransform_v2
<
BlockSize
>
;
for
(
index_t
i
=
0
;
i
<
5
;
++
i
)
{
std
::
cout
<<
"Start running "
<<
nrepeat
<<
" times..."
<<
std
::
endl
;
...
...
@@ -75,14 +93,14 @@ void device_dummy_dynamic_transform_v2(InDesc,
for
(
index_t
j
=
0
;
j
<
nrepeat
;
++
j
)
{
launch_kernel
(
run_gridwise_operation
<
dummy_transform
,
#if 1
launch_kernel
(
run_gridwise_operation
<
DummyDynamicTransform_v2_1
<
BlockSize
>
,
index_t
*
const
,
float
*
const
,
float
*
const
,
const
decltype
(
wei_kcyx_desc
),
const
decltype
(
in_nchw_desc
),
const
decltype
(
out_nkhw_desc
),
const
decltype
(
in_gemmk_gemmn_global_desc
),
const
Array
<
index_t
,
2
>
,
const
Array
<
index_t
,
2
>
,
const
Array
<
index_t
,
2
>
,
...
...
@@ -97,11 +115,33 @@ void device_dummy_dynamic_transform_v2(InDesc,
wei_kcyx_desc
,
in_nchw_desc
,
out_nkhw_desc
,
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
#else
launch_kernel
(
run_gridwise_operation
<
DummyDynamicTransform_v2_2
<
BlockSize
>
,
index_t
*
const
,
float
*
const
,
float
*
const
,
const
decltype
(
in_gemmk_gemmn_global_desc
),
const
Array
<
index_t
,
2
>
,
const
Array
<
index_t
,
2
>
,
const
Array
<
index_t
,
2
>
,
const
Array
<
index_t
,
2
>>
,
dim3
(
GridSize
),
dim3
(
BlockSize
),
0
,
0
,
static_cast
<
index_t
*>
(
wei_kcyx_device_buf
.
GetDeviceBuffer
()),
static_cast
<
float
*>
(
in_nchw_device_buf
.
GetDeviceBuffer
()),
static_cast
<
float
*>
(
out_nkhw_device_buf
.
GetDeviceBuffer
()),
in_gemmk_gemmn_global_desc
,
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
#endif
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment