Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
c3a4652a
"include/vscode:/vscode.git/clone" did not exist on "7f09b8a0a47ddf08149fb6c08b6d0293780a2eb8"
Commit
c3a4652a
authored
Oct 15, 2024
by
Jing Zhang
Browse files
move packed into dynamic_buffer
parent
77ad000e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
6 deletions
+18
-6
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp
...nsor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp
+7
-4
include/ck/utility/dynamic_buffer.hpp
include/ck/utility/dynamic_buffer.hpp
+11
-2
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp
View file @
c3a4652a
...
...
@@ -1318,14 +1318,17 @@ struct GridwiseGemm_xdl_cshuffle_v3
constexpr
auto
a_block_space_size_aligned
=
math
::
integer_least_multiple
(
a_block_desc_ak0_m_ak1
.
GetElementSpaceSize
(),
max_lds_align
);
constexpr
auto
b_block_space_size_aligned
=
math
::
integer_least_multiple
(
b_block_desc_bk0_n_bk1
.
GetElementSpaceSize
(),
max_lds_align
);
// Cast after lds
auto
a_block_buf
=
make_dynamic_buffer
<
AddressSpaceEnum
::
Lds
>
(
static_cast
<
ADataType
*>
(
p_shared
),
a_block_
desc_ak0_m_ak1
.
GetElementS
pace
S
ize
()
);
static_cast
<
ADataType
*>
(
p_shared
),
a_block_
s
pace
_s
ize
_aligned
);
auto
b_block_buf
=
make_dynamic_buffer
<
AddressSpaceEnum
::
Lds
>
(
reinterpret_cast
<
BDataType
*>
(
static_cast
<
ADataType
*>
(
p_shared
)
+
a_block_space_size_aligned
),
b_block_
desc_bk0_n_bk1
.
GetElementS
pace
S
ize
()
);
reinterpret_cast
<
BDataType
*>
(
static_cast
<
char
*>
(
p_shared
)
+
a_block_space_size_aligned
*
sizeof
(
ADataType
)
/
APackedSize
),
b_block_
s
pace
_s
ize
_aligned
);
constexpr
auto
a_block_slice_copy_step
=
make_multi_index
(
KPerBlock
/
AK1Number
,
0
,
0
);
constexpr
auto
b_block_slice_copy_step
=
make_multi_index
(
KPerBlock
/
BK1Number
,
0
,
0
);
...
...
include/ck/utility/dynamic_buffer.hpp
View file @
c3a4652a
...
...
@@ -76,19 +76,28 @@ struct DynamicBuffer
{
constexpr
index_t
t_per_x
=
scalar_per_x_vector
/
scalar_per_t_vector
;
constexpr
index_t
PackedSize
=
[]()
{
if
constexpr
(
is_same_v
<
remove_cvref_t
<
T
>
,
pk_i4_t
>
)
return
2
;
else
return
1
;
}();
//static_assert(element_space_size_ % PackedSize == 0, "");
if
constexpr
(
InvalidElementUseNumericalZeroValue
)
{
return
amd_buffer_load_invalid_element_return_zero
<
remove_cvref_t
<
T
>
,
t_per_x
,
coherence
>
(
p_data_
,
i
,
is_valid_element
,
element_space_size_
);
p_data_
,
i
,
is_valid_element
,
element_space_size_
/
PackedSize
);
}
else
{
return
amd_buffer_load_invalid_element_return_customized_value
<
remove_cvref_t
<
T
>
,
t_per_x
,
coherence
>
(
p_data_
,
i
,
is_valid_element
,
element_space_size_
,
invalid_element_value_
);
p_data_
,
i
,
is_valid_element
,
element_space_size_
/
PackedSize
,
invalid_element_value_
);
}
}
else
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment