Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
89adad76
Commit
89adad76
authored
Jan 03, 2025
by
Ville Pietilä
Browse files
Add more logging.
parent
c46e02cc
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
73 additions
and
5 deletions
+73
-5
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
...ion/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
+15
-0
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
...grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
+6
-2
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
...sor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
+16
-0
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
...u/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
+10
-2
include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp
...tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp
+26
-1
No files found.
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
View file @
89adad76
...
@@ -672,6 +672,12 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
...
@@ -672,6 +672,12 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
if
((
ck
::
type_convert
<
ck
::
index_t
>
(
arg
.
gemm_desc_kernel_arg_
.
size
())
+
if
((
ck
::
type_convert
<
ck
::
index_t
>
(
arg
.
gemm_desc_kernel_arg_
.
size
())
+
arg
.
skipped_group_count_
)
!=
arg
.
group_count_
)
arg
.
skipped_group_count_
)
!=
arg
.
group_count_
)
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument] arg.skipped_group_count_ + arg.gemm_desc_kernel_arg_.size(): "
<<
arg
.
skipped_group_count_
<<
" + "
<<
arg
.
gemm_desc_kernel_arg_
.
size
()
<<
"!= group_count_: "
<<
arg
.
group_count_
<<
std
::
endl
;
}
return
false
;
return
false
;
}
}
...
@@ -691,6 +697,15 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
...
@@ -691,6 +697,15 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
}
}
else
else
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument] Device properties check failed:"
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument] Device: "
<<
ck
::
get_device_name
()
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument] XDL support: "
<<
std
::
to_string
(
ck
::
is_xdl_supported
())
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument] gfx103 support: "
<<
std
::
to_string
(
ck
::
is_gfx103_supported
())
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument] gfx11 support: "
<<
std
::
to_string
(
ck
::
is_gfx11_supported
())
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument] gfx12 support: "
<<
std
::
to_string
(
ck
::
is_gfx12_supported
())
<<
std
::
endl
;
}
return
false
;
return
false
;
}
}
}
}
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
View file @
89adad76
...
@@ -820,6 +820,10 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
...
@@ -820,6 +820,10 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
{
{
if
(
!
ck
::
is_xdl_supported
())
if
(
!
ck
::
is_xdl_supported
())
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument ] XDL is not supported."
<<
std
::
endl
;
}
return
false
;
return
false
;
}
}
...
@@ -828,8 +832,8 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
...
@@ -828,8 +832,8 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"The group count is not equal to sum of skipped groups "
std
::
cout
<<
"
[ NotSupportedArgument ]
The group count is not equal to sum of skipped groups "
"and kernel args size!"
<<
"and kernel args size!"
<<
std
::
endl
;
<<
std
::
endl
;
}
}
return
false
;
return
false
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
View file @
89adad76
...
@@ -612,12 +612,22 @@ struct DeviceGroupedGemm_Xdl : public DeviceGroupedGemm<ALayout,
...
@@ -612,12 +612,22 @@ struct DeviceGroupedGemm_Xdl : public DeviceGroupedGemm<ALayout,
{
{
if
(
!
ck
::
is_xdl_supported
())
if
(
!
ck
::
is_xdl_supported
())
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument ] XDL is not supported."
<<
std
::
endl
;
}
return
false
;
return
false
;
}
}
if
((
ck
::
type_convert
<
ck
::
index_t
>
(
arg
.
gemm_desc_kernel_arg_
.
size
())
+
if
((
ck
::
type_convert
<
ck
::
index_t
>
(
arg
.
gemm_desc_kernel_arg_
.
size
())
+
arg
.
skipped_group_count_
)
!=
arg
.
group_count_
)
arg
.
skipped_group_count_
)
!=
arg
.
group_count_
)
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument ] The group count is not equal to sum of skipped groups "
<<
"and kernel args size!"
<<
std
::
endl
;
}
return
false
;
return
false
;
}
}
...
@@ -641,6 +651,12 @@ struct DeviceGroupedGemm_Xdl : public DeviceGroupedGemm<ALayout,
...
@@ -641,6 +651,12 @@ struct DeviceGroupedGemm_Xdl : public DeviceGroupedGemm<ALayout,
supported
=
supported
&
(
b_vector_dim
%
BBlockTransferSrcScalarPerVector
==
0
);
supported
=
supported
&
(
b_vector_dim
%
BBlockTransferSrcScalarPerVector
==
0
);
}
}
}
}
if
(
!
supported
&&
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument ] If padding is used, vector loads are not supported for dimensions "
<<
"not divisible by vector load size."
<<
std
::
endl
;
}
return
supported
;
return
supported
;
}
}
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
View file @
89adad76
...
@@ -524,6 +524,10 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
...
@@ -524,6 +524,10 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
{
{
if
(
!
ck
::
is_xdl_supported
())
if
(
!
ck
::
is_xdl_supported
())
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument ] XDL is not supported."
<<
std
::
endl
;
}
return
false
;
return
false
;
}
}
...
@@ -532,8 +536,8 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
...
@@ -532,8 +536,8 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"The group count is not equal to sum of skipped groups "
std
::
cout
<<
"
[ NotSupportedArgument ]
The group count is not equal to sum of skipped groups "
"and kernel args size!"
<<
"and kernel args size!"
<<
std
::
endl
;
<<
std
::
endl
;
}
}
return
false
;
return
false
;
...
@@ -541,6 +545,10 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
...
@@ -541,6 +545,10 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
if
(
std
::
is_same_v
<
EDataType
,
ck
::
bhalf_t
>
&&
arg
.
K_BATCH
>
1
&&
!
is_bf16_atomic_supported
())
if
(
std
::
is_same_v
<
EDataType
,
ck
::
bhalf_t
>
&&
arg
.
K_BATCH
>
1
&&
!
is_bf16_atomic_supported
())
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument ] When BF16 atomic is not supported, K_BATCH must be unity."
<<
std
::
endl
;
}
return
false
;
return
false
;
}
}
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp
View file @
89adad76
...
@@ -117,6 +117,16 @@ struct GridwiseGemmDlMultipleD_km_kn_mn
...
@@ -117,6 +117,16 @@ struct GridwiseGemmDlMultipleD_km_kn_mn
b_grid_desc_k0_n_k1
.
GetElementSpaceSize
()
*
sizeof
(
FloatAB
)
<=
TwoGB
&&
b_grid_desc_k0_n_k1
.
GetElementSpaceSize
()
*
sizeof
(
FloatAB
)
<=
TwoGB
&&
c_grid_desc_m_n
.
GetElementSpaceSize
()
*
sizeof
(
FloatC
)
<=
TwoGB
))
c_grid_desc_m_n
.
GetElementSpaceSize
()
*
sizeof
(
FloatC
)
<=
TwoGB
))
{
{
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument ] Input matrices exceeded 2GB limitation:"
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument ] A matrix: "
<<
a_grid_desc_k0_m_k1
.
GetElementSpaceSize
()
*
sizeof
(
FloatAB
)
<<
" bytes"
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument ] B matrix: "
<<
b_grid_desc_k0_n_k1
.
GetElementSpaceSize
()
*
sizeof
(
FloatAB
)
<<
" bytes"
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument ] C matrix: "
<<
c_grid_desc_m_n
.
GetElementSpaceSize
()
*
sizeof
(
FloatC
)
<<
" bytes"
<<
std
::
endl
;
}
return
false
;
return
false
;
}
}
...
@@ -126,11 +136,26 @@ struct GridwiseGemmDlMultipleD_km_kn_mn
...
@@ -126,11 +136,26 @@ struct GridwiseGemmDlMultipleD_km_kn_mn
// TODO: also check validity of all components (blockwise-copy, threadwise-copy, etc)
// TODO: also check validity of all components (blockwise-copy, threadwise-copy, etc)
return
(
M
==
c_grid_desc_m_n
.
GetLength
(
I0
)
&&
N
==
c_grid_desc_m_n
.
GetLength
(
I1
)
&&
const
bool
validDims
=
(
M
==
c_grid_desc_m_n
.
GetLength
(
I0
)
&&
N
==
c_grid_desc_m_n
.
GetLength
(
I1
)
&&
K0
==
b_grid_desc_k0_n_k1
.
GetLength
(
I0
)
&&
K0
==
b_grid_desc_k0_n_k1
.
GetLength
(
I0
)
&&
K1
==
a_grid_desc_k0_m_k1
.
GetLength
(
I2
)
&&
K1
==
a_grid_desc_k0_m_k1
.
GetLength
(
I2
)
&&
K1
==
b_grid_desc_k0_n_k1
.
GetLength
(
I2
))
&&
K1
==
b_grid_desc_k0_n_k1
.
GetLength
(
I2
))
&&
(
M
%
MPerBlock
==
0
&&
N
%
NPerBlock
==
0
&&
K0
%
K0PerBlock
==
0
);
(
M
%
MPerBlock
==
0
&&
N
%
NPerBlock
==
0
&&
K0
%
K0PerBlock
==
0
);
if
(
!
validDims
&&
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
std
::
cout
<<
"[ NotSupportedArgument ] Invalid dimension:"
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument ] M: "
<<
M
<<
" N: "
<<
N
<<
" K0: "
<<
K0
<<
" K1: "
<<
K1
<<
std
::
endl
;
std
::
cout
<<
"[ NotSupportedArgument ] MPerBlock: "
<<
MPerBlock
<<
" NPerBlock: "
<<
NPerBlock
<<
" K0PerBlock: "
<<
K0PerBlock
<<
std
::
endl
;
std
::
cout
<<
" [ NotSupportedArgument ] c_grid_desc_m_n (M): "
<<
std
::
to_string
(
c_grid_desc_m_n
.
GetLength
(
I0
))
<<
std
::
endl
;
std
::
cout
<<
" [ NotSupportedArgument ] c_grid_desc_m_n (N): "
<<
std
::
to_string
(
c_grid_desc_m_n
.
GetLength
(
I1
))
<<
std
::
endl
;
std
::
cout
<<
" [ NotSupportedArgument ] b_grid_desc_k0_n_k1 (K0): "
<<
std
::
to_string
(
b_grid_desc_k0_n_k1
.
GetLength
(
I0
))
<<
std
::
endl
;
std
::
cout
<<
" [ NotSupportedArgument ] a_grid_desc_k0_m_k1 (K1): "
<<
std
::
to_string
(
a_grid_desc_k0_m_k1
.
GetLength
(
I2
))
<<
std
::
endl
;
std
::
cout
<<
" [ NotSupportedArgument ] b_grid_desc_k0_n_k1 (K1): "
<<
std
::
to_string
(
b_grid_desc_k0_n_k1
.
GetLength
(
I2
))
<<
std
::
endl
;
}
return
validDims
;
}
}
__host__
__device__
static
constexpr
index_t
CalculateGridSize
(
index_t
M
,
index_t
N
)
__host__
__device__
static
constexpr
index_t
CalculateGridSize
(
index_t
M
,
index_t
N
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment