Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
1274861a
Unverified
Commit
1274861a
authored
May 17, 2024
by
Illia Silin
Committed by
GitHub
May 17, 2024
Browse files
replace the ENV macro with CK_ENV (#1296)
parent
6637a810
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
28 additions
and
28 deletions
+28
-28
include/ck/host_utility/flush_cache.hpp
include/ck/host_utility/flush_cache.hpp
+3
-3
include/ck/host_utility/kernel_launch.hpp
include/ck/host_utility/kernel_launch.hpp
+4
-4
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp
..._batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp
...u/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp
...device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
...device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
..._fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
...nv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
...e/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
.../gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
...u/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp
...gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp
...pu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp
...de/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp
...ation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp
...on/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp
..._operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
...ion/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
...grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
+4
-4
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp
...device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp
+1
-1
No files found.
include/ck/host_utility/flush_cache.hpp
View file @
1274861a
...
@@ -117,7 +117,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
...
@@ -117,7 +117,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
#define MEDIAN 1
#define MEDIAN 1
if
(
stream_config
.
time_kernel_
)
if
(
stream_config
.
time_kernel_
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
printf
(
"%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u}
\n
"
,
printf
(
"%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u}
\n
"
,
__func__
,
__func__
,
...
@@ -142,7 +142,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
...
@@ -142,7 +142,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
{
{
return
0.0
;
return
0.0
;
}
}
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
printf
(
"Start running %d times...
\n
"
,
nrepeat
);
printf
(
"Start running %d times...
\n
"
,
nrepeat
);
}
}
...
@@ -186,7 +186,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
...
@@ -186,7 +186,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
total_time
+=
cur_time
;
total_time
+=
cur_time
;
#endif
#endif
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"i: "
<<
i
<<
" cur_time: "
<<
cur_time
<<
std
::
endl
;
std
::
cout
<<
"i: "
<<
i
<<
" cur_time: "
<<
cur_time
<<
std
::
endl
;
...
...
include/ck/host_utility/kernel_launch.hpp
View file @
1274861a
...
@@ -20,7 +20,7 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
...
@@ -20,7 +20,7 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
#if CK_TIME_KERNEL
#if CK_TIME_KERNEL
if
(
stream_config
.
time_kernel_
)
if
(
stream_config
.
time_kernel_
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
printf
(
"%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u}
\n
"
,
printf
(
"%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u}
\n
"
,
__func__
,
__func__
,
...
@@ -41,7 +41,7 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
...
@@ -41,7 +41,7 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
}
}
const
int
nrepeat
=
stream_config
.
nrepeat_
;
const
int
nrepeat
=
stream_config
.
nrepeat_
;
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
printf
(
"Start running %d times...
\n
"
,
nrepeat
);
printf
(
"Start running %d times...
\n
"
,
nrepeat
);
}
}
...
@@ -95,7 +95,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
...
@@ -95,7 +95,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
#if CK_TIME_KERNEL
#if CK_TIME_KERNEL
if
(
stream_config
.
time_kernel_
)
if
(
stream_config
.
time_kernel_
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
printf
(
"%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u}
\n
"
,
printf
(
"%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u}
\n
"
,
__func__
,
__func__
,
...
@@ -117,7 +117,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
...
@@ -117,7 +117,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
}
}
const
int
nrepeat
=
stream_config
.
nrepeat_
;
const
int
nrepeat
=
stream_config
.
nrepeat_
;
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
printf
(
"Start running %d times...
\n
"
,
nrepeat
);
printf
(
"Start running %d times...
\n
"
,
nrepeat
);
}
}
...
...
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp
View file @
1274861a
...
@@ -587,7 +587,7 @@ struct DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle
...
@@ -587,7 +587,7 @@ struct DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle
BatchStrideD1s
,
BatchStrideD1s
,
BatchStrideE1
}
BatchStrideE1
}
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"a0_grid_desc_m_k_{"
<<
a0_grid_desc_m_k_
.
GetLength
(
I0
)
<<
", "
std
::
cout
<<
"a0_grid_desc_m_k_{"
<<
a0_grid_desc_m_k_
.
GetLength
(
I0
)
<<
", "
<<
a0_grid_desc_m_k_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
<<
a0_grid_desc_m_k_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp
View file @
1274861a
...
@@ -658,7 +658,7 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<0, ReduceO
...
@@ -658,7 +658,7 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<0, ReduceO
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
{
{
std
::
cout
<<
"arg.Batch_ = "
<<
arg
.
Batch_
<<
std
::
endl
;
std
::
cout
<<
"arg.Batch_ = "
<<
arg
.
Batch_
<<
std
::
endl
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp
View file @
1274861a
...
@@ -719,7 +719,7 @@ struct DeviceBatchedGemmSoftmaxGemmPermute_Xdl_CShuffle
...
@@ -719,7 +719,7 @@ struct DeviceBatchedGemmSoftmaxGemmPermute_Xdl_CShuffle
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
arg
.
Print
();
arg
.
Print
();
}
}
...
...
include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
View file @
1274861a
...
@@ -516,7 +516,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -516,7 +516,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
float
ave_time
=
0
;
float
ave_time
=
0
;
for
(
size_t
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_container_{"
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_container_{"
...
...
include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
View file @
1274861a
...
@@ -644,7 +644,7 @@ struct
...
@@ -644,7 +644,7 @@ struct
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
DeviceOp
{}.
GetTypeString
()
<<
std
::
endl
;
std
::
cout
<<
DeviceOp
{}.
GetTypeString
()
<<
std
::
endl
;
std
::
cout
<<
"N "
<<
arg
.
Conv_N_
<<
", "
std
::
cout
<<
"N "
<<
arg
.
Conv_N_
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
View file @
1274861a
...
@@ -614,7 +614,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
...
@@ -614,7 +614,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
DeviceOp
{}.
GetTypeString
()
<<
std
::
endl
;
std
::
cout
<<
DeviceOp
{}.
GetTypeString
()
<<
std
::
endl
;
std
::
cout
<<
"N "
<<
arg
.
Conv_N_
<<
", "
std
::
cout
<<
"N "
<<
arg
.
Conv_N_
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
View file @
1274861a
...
@@ -579,7 +579,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -579,7 +579,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
DeviceOp
{}.
GetTypeString
()
<<
std
::
endl
;
std
::
cout
<<
DeviceOp
{}.
GetTypeString
()
<<
std
::
endl
;
std
::
cout
<<
"N "
<<
arg
.
Conv_N_
<<
", "
std
::
cout
<<
"N "
<<
arg
.
Conv_N_
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
1274861a
...
@@ -431,7 +431,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -431,7 +431,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
<<
", "
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I1
)
<<
", "
<<
", "
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I1
)
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
View file @
1274861a
...
@@ -401,7 +401,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
...
@@ -401,7 +401,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"num_batches_of_GEMM = "
<<
arg
.
num_subbatches_
<<
std
::
endl
;
std
::
cout
<<
"num_batches_of_GEMM = "
<<
arg
.
num_subbatches_
<<
std
::
endl
;
std
::
cout
<<
"a_grid_desc_k0_m_k1{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"a_grid_desc_k0_m_k1{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
...
include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp
View file @
1274861a
...
@@ -1272,7 +1272,7 @@ struct DeviceConvNdBwdDataNwcKxcNwk_Dl
...
@@ -1272,7 +1272,7 @@ struct DeviceConvNdBwdDataNwcKxcNwk_Dl
float
ave_time
=
0
;
float
ave_time
=
0
;
for
(
size_t
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_container_{"
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_container_{"
<<
arg
.
a_grid_desc_k0_m_k1_container_
[
i
].
GetLength
(
I0
)
<<
", "
<<
arg
.
a_grid_desc_k0_m_k1_container_
[
i
].
GetLength
(
I0
)
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp
View file @
1274861a
...
@@ -1220,7 +1220,7 @@ struct DeviceConvNdBwdDataNwcKxcNwk_Xdl
...
@@ -1220,7 +1220,7 @@ struct DeviceConvNdBwdDataNwcKxcNwk_Xdl
float
ave_time
=
0
;
float
ave_time
=
0
;
for
(
size_t
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1{"
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1{"
<<
arg
.
a_grid_desc_k0_m_k1_container_
[
i
].
GetLength
(
I0
)
<<
", "
<<
arg
.
a_grid_desc_k0_m_k1_container_
[
i
].
GetLength
(
I0
)
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp
View file @
1274861a
...
@@ -334,7 +334,7 @@ struct DeviceGemmDl : public DeviceGemm<ALayout,
...
@@ -334,7 +334,7 @@ struct DeviceGemmDl : public DeviceGemm<ALayout,
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m0_m1_k1_{"
std
::
cout
<<
"arg.a_grid_desc_k0_m0_m1_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
<<
", "
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp
View file @
1274861a
...
@@ -510,7 +510,7 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<0, ReduceOperatio
...
@@ -510,7 +510,7 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<0, ReduceOperatio
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"arg.a_grid_desc_ak0_m_ak1_{"
std
::
cout
<<
"arg.a_grid_desc_ak0_m_ak1_{"
<<
arg
.
a_grid_desc_ak0_m_ak1_
.
GetLength
(
I0
)
<<
", "
<<
arg
.
a_grid_desc_ak0_m_ak1_
.
GetLength
(
I0
)
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp
View file @
1274861a
...
@@ -514,7 +514,7 @@ struct DeviceGemmLayerNorm_Xdl_CShuffle : public BaseOperator
...
@@ -514,7 +514,7 @@ struct DeviceGemmLayerNorm_Xdl_CShuffle : public BaseOperator
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"arg.a_grid_desc_ak0_m_ak1_{"
std
::
cout
<<
"arg.a_grid_desc_ak0_m_ak1_{"
<<
arg
.
a_grid_desc_ak0_m_ak1_
.
GetLength
(
I0
)
<<
", "
<<
arg
.
a_grid_desc_ak0_m_ak1_
.
GetLength
(
I0
)
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp
View file @
1274861a
...
@@ -299,7 +299,7 @@ struct DeviceGemmXdlSkipBLds : public DeviceGemm<ALayout,
...
@@ -299,7 +299,7 @@ struct DeviceGemmXdlSkipBLds : public DeviceGemm<ALayout,
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
<<
", "
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I1
)
<<
", "
<<
", "
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I1
)
<<
", "
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
View file @
1274861a
...
@@ -553,7 +553,7 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
...
@@ -553,7 +553,7 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
for
(
std
::
size_t
i
=
0
;
i
<
arg
.
gemm_desc_kernel_arg_
.
size
();
i
++
)
for
(
std
::
size_t
i
=
0
;
i
<
arg
.
gemm_desc_kernel_arg_
.
size
();
i
++
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"group: "
<<
i
<<
" arg.a_grid_desc_k0_m_k1_{"
std
::
cout
<<
"group: "
<<
i
<<
" arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
gemm_desc_kernel_arg_
[
i
].
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
<<
arg
.
gemm_desc_kernel_arg_
[
i
].
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
View file @
1274861a
...
@@ -468,7 +468,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
...
@@ -468,7 +468,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
gemm_kernel_args_
[
i
].
block_start_
=
block_start
;
gemm_kernel_args_
[
i
].
block_start_
=
block_start
;
gemm_kernel_args_
[
i
].
block_end_
=
block_end
;
gemm_kernel_args_
[
i
].
block_end_
=
block_end
;
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
index_t
tiles
=
(
block_end
-
block_start
)
/
K_BATCH
;
index_t
tiles
=
(
block_end
-
block_start
)
/
K_BATCH
;
std
::
cout
<<
"block_start: "
<<
block_start
<<
"
\n
"
std
::
cout
<<
"block_start: "
<<
block_start
<<
"
\n
"
...
@@ -495,7 +495,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
...
@@ -495,7 +495,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
arg
.
karg_
.
p_c_grid
=
p_workspace
+
offset
;
arg
.
karg_
.
p_c_grid
=
p_workspace
+
offset
;
index_t
tiles
=
(
arg
.
block_end_
-
arg
.
block_start_
)
/
arg
.
karg_
.
k_batch
;
index_t
tiles
=
(
arg
.
block_end_
-
arg
.
block_start_
)
/
arg
.
karg_
.
k_batch
;
offset
+=
tiles
*
MPerBlock
*
NPerBlock
;
offset
+=
tiles
*
MPerBlock
*
NPerBlock
;
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"block_start: "
<<
arg
.
block_start_
<<
"
\n
"
std
::
cout
<<
"block_start: "
<<
arg
.
block_start_
<<
"
\n
"
<<
"block_end: "
<<
arg
.
block_end_
<<
"
\n
"
<<
"block_end: "
<<
arg
.
block_end_
<<
"
\n
"
...
@@ -819,7 +819,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
...
@@ -819,7 +819,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
if
((
ck
::
type_convert
<
ck
::
index_t
>
(
arg
.
gemm_kernel_args_
.
size
())
+
if
((
ck
::
type_convert
<
ck
::
index_t
>
(
arg
.
gemm_kernel_args_
.
size
())
+
arg
.
skipped_group_count_
)
!=
arg
.
group_count_
)
arg
.
skipped_group_count_
)
!=
arg
.
group_count_
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"The group count is not equal to sum of skipped groups "
std
::
cout
<<
"The group count is not equal to sum of skipped groups "
"and kernel args size!"
"and kernel args size!"
...
@@ -836,7 +836,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
...
@@ -836,7 +836,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
bool
group_arg_valid
=
GridwiseGemm
::
CheckValidity
(
gemm_arg
);
bool
group_arg_valid
=
GridwiseGemm
::
CheckValidity
(
gemm_arg
);
if
(
not
group_arg_valid
)
if
(
not
group_arg_valid
)
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"["
<<
__func__
<<
"] group id: "
<<
i
std
::
cout
<<
"["
<<
__func__
<<
"] group id: "
<<
i
<<
" has invalid GridwiseGemm settings!"
<<
std
::
endl
;
<<
" has invalid GridwiseGemm settings!"
<<
std
::
endl
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp
View file @
1274861a
...
@@ -620,7 +620,7 @@ struct DeviceGroupedGemmMultipleDXdlCShuffleTileLoop
...
@@ -620,7 +620,7 @@ struct DeviceGroupedGemmMultipleDXdlCShuffleTileLoop
GridwiseGemm
::
template
CheckTensorTransfersValidity
<
ALayout
,
BLayout
,
ELayout
>(
GridwiseGemm
::
template
CheckTensorTransfersValidity
<
ALayout
,
BLayout
,
ELayout
>(
M
,
N
,
K
)))
M
,
N
,
K
)))
{
{
if
(
ck
::
EnvIsEnabled
(
ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_
ENV
(
CK_LOGGING
)))
{
{
std
::
cout
<<
"The provided GEMM problem size (M,N,K) ["
<<
M
<<
","
<<
N
<<
","
std
::
cout
<<
"The provided GEMM problem size (M,N,K) ["
<<
M
<<
","
<<
N
<<
","
<<
K
<<
"] are not supported by current template parameters!"
<<
K
<<
"] are not supported by current template parameters!"
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment