Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
905f5a3f
Commit
905f5a3f
authored
Apr 23, 2021
by
Chao Liu
Browse files
updating v5r1
parent
474733b5
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4 additions
and
42 deletions
+4
-42
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
...nel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
+4
-33
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_set.hpp
.../tensor_operation/threadwise_dynamic_tensor_slice_set.hpp
+0
-3
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
...or_operation/threadwise_dynamic_tensor_slice_transfer.hpp
+0
-6
No files found.
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
View file @
905f5a3f
...
...
@@ -679,23 +679,6 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
FloatAB
*
p_a_block
=
p_shared_block
;
#if 0
// register allocation for output
FloatAcc p_c_thread[c_k_n_ho_wo_thread_desc.GetElementSpaceSize()];
// zero out threadwise output
threadwise_matrix_set_zero_v3(c_k_n_ho_wo_thread_desc, p_c_thread);
#elif
0
// register allocation for output
FloatAcc
p_c_thread
[
c_k_n_ho_wo_thread_desc
.
GetElementSpaceSize
()];
auto
c_thread_buf
=
make_dynamic_buffer
(
p_c_thread
);
ThreadwiseDynamicTensorSliceSet_v1
<
FloatAcc
,
decltype
(
c_k_n_ho_wo_thread_desc
),
Sequence
<
KPerThread
,
1
,
HoPerThread
,
WoPerThread
>>
{}
.
Run
(
c_k_n_ho_wo_thread_desc
,
make_tuple
(
I0
,
I0
,
I0
,
I0
),
c_thread_buf
,
FloatAcc
{
0
});
#elif 1
// register allocation for output
StaticBuffer
<
FloatAcc
,
c_k_n_ho_wo_thread_desc
.
GetElementSpaceSize
()
>
c_thread_buf
;
...
...
@@ -703,7 +686,6 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
decltype
(
c_k_n_ho_wo_thread_desc
),
Sequence
<
KPerThread
,
1
,
HoPerThread
,
WoPerThread
>>
{}
.
Run
(
c_k_n_ho_wo_thread_desc
,
make_tuple
(
I0
,
I0
,
I0
,
I0
),
c_thread_buf
,
FloatAcc
{
0
});
#endif
constexpr
auto
b_thread_slice_copy_step
=
make_multi_index
(
EPerBlock
,
0
,
0
,
0
);
...
...
@@ -718,21 +700,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
constexpr
auto
b_e_n_ho_wo_global_move_slice_window_iterator_hack
=
BGlobalMoveSliceWindowIteratorHacks
{};
#if 0
constexpr auto b_thread_space_size = b_e_n_ho_wo_thread_desc.GetElementSpaceSize();
FloatAB p_b_thread[b_thread_space_size * 2];
FloatAB* p_b_thread_double = p_b_thread;
#elif
1
constexpr
auto
b_thread_space_size
=
b_e_n_ho_wo_thread_desc
.
GetElementSpaceSize
();
FloatAB
p_b_thread
[
b_thread_space_size
*
2
];
auto
b_thread_even_buf
=
make_dynamic_buffer
(
p_b_thread
);
auto
b_thread_odd_buf
=
make_dynamic_buffer
(
p_b_thread
+
b_thread_space_size
);
#else
StaticBuffer
<
FloatAB
,
b_e_n_ho_wo_thread_desc
.
GetElementSpaceSize
()
>
a_thread_even_buf
,
a_thread_odd_buf
;
#endif
// double regsiter buffer for b
StaticBuffer
<
FloatAB
,
b_e_n_ho_wo_thread_desc
.
GetElementSpaceSize
()
>
b_thread_even_buf
,
b_thread_odd_buf
;
// LDS double buffer: preload data into LDS
{
...
...
@@ -770,6 +740,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
b_e_n_ho_wo_global_iterator_hacks
);
// LDS double buffer: GEMM on current data
// TODO: @Zhang Jing: blockwise gemm should be able to move slice window
blockwise_gemm
.
Run
(
make_dynamic_buffer
(
p_a_block
+
a_e_k_block_desc
.
CalculateOffset
(
make_tuple
(
b_block_data_begin
,
0
))),
...
...
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_set.hpp
View file @
905f5a3f
...
...
@@ -28,10 +28,7 @@ struct ThreadwiseDynamicTensorSliceSet_v1
static_assert
(
Desc
::
IsKnownAtCompileTime
(),
"wrong! SrcDesc and DstDesc need to known at compile-time"
);
#if 0
// TODO: turn this on when v5r1 is update to get rid of array
static_assert
(
Buffer
::
IsStaticBuffer
(),
"wrong! DstBuffer need to be StaticBuffer"
);
#endif
static_assert
(
is_known_at_compile_time
<
remove_cv_t
<
remove_reference_t
<
OriginIdx
>>>::
value
,
"wrong! OriginIdx need to be known at compile-time"
);
...
...
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
View file @
905f5a3f
...
...
@@ -98,14 +98,11 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1r3
is_known_at_compile_time
<
remove_cv_t
<
remove_reference_t
<
SrcSliceOriginIdx
>>>::
value
,
"wrong! SrcSliceOrigin need to known at compile-time"
);
#if 0 // debug
// TODO: turn this on, once v5r1 is updated to use StaticBuffer for holding C data
static_assert
(
SrcBuffer
::
IsStaticBuffer
(),
"wrong! SrcBuffer need to be StaticBuffer"
);
static_assert
(
is_same
<
remove_cv_t
<
remove_reference_t
<
typename
SrcBuffer
::
type
>>
,
remove_cv_t
<
remove_reference_t
<
SrcData
>>>::
value
,
"wrong! SrcBuffer data type is wrong"
);
#endif
// SrcDesc and src_slice_origin_idx are known at compile-time
constexpr
auto
src_desc
=
remove_cv_t
<
remove_reference_t
<
SrcDesc
>>
{};
...
...
@@ -1403,10 +1400,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v4
remove_cv_t
<
remove_reference_t
<
DstData
>>>::
value
,
"wrong! SrcBuffer or DstBuffer data type is wrong"
);
#if 0
// turn this on after v5r1 is updated
static_assert
(
DstBuffer
::
IsStaticBuffer
(),
"wrong! DstBuffer need to be StaticBuffer"
);
#endif
static_assert
(
is_known_at_compile_time
<
remove_cv_t
<
remove_reference_t
<
SrcRefToOriginDisplacement
>>>::
value
&&
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment