Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
1814c9c1
Commit
1814c9c1
authored
May 05, 2023
by
Po-Yen, Chen
Browse files
Reduce more temlate params of kernel
parent
ea025f07
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
22 deletions
+11
-22
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp
...e/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp
+2
-12
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
...k/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
+9
-10
No files found.
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp
View file @
1814c9c1
...
...
@@ -312,12 +312,7 @@ struct DeviceGemmXdl : public DeviceGemm<ALayout,
if
(
GridwiseGemm
::
CalculateHasMainKBlockLoop
(
K
))
{
const
auto
kernel
=
kernel_gemm_xdlops_v2r3
<
GridwiseGemm
,
ADataType
,
// TODO: distiguish A/B datatype
CDataType
,
Argument
,
true
>
;
const
auto
kernel
=
kernel_gemm_xdlops_v2r3
<
GridwiseGemm
,
Argument
,
true
>
;
ave_time
=
launch_and_time_kernel
(
stream_config
,
kernel
,
...
...
@@ -331,12 +326,7 @@ struct DeviceGemmXdl : public DeviceGemm<ALayout,
}
else
{
const
auto
kernel
=
kernel_gemm_xdlops_v2r3
<
GridwiseGemm
,
ADataType
,
// TODO: distiguish A/B datatype
CDataType
,
Argument
,
false
>
;
const
auto
kernel
=
kernel_gemm_xdlops_v2r3
<
GridwiseGemm
,
Argument
,
false
>
;
ave_time
=
launch_and_time_kernel
(
stream_config
,
kernel
,
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
View file @
1814c9c1
...
...
@@ -16,18 +16,14 @@
namespace
ck
{
template
<
typename
GridwiseGemm
,
typename
FloatAB
,
typename
FloatC
,
typename
Argument
,
bool
HasMainKBlockLoop
>
template
<
typename
GridwiseGemm
,
typename
Argument
,
bool
HasMainKBlockLoop
>
__global__
void
#if CK_USE_LAUNCH_BOUNDS
__launch_bounds__
(
CK_MAX_THREAD_PER_BLOCK
,
CK_MIN_BLOCK_PER_CU
)
#endif
kernel_gemm_xdlops_v2r3
(
const
FloatAB
*
__restrict__
p_a_grid
,
const
FloatAB
*
__restrict__
p_b_grid
,
FloatC
*
__restrict__
p_c_grid
,
kernel_gemm_xdlops_v2r3
(
const
typename
GridwiseGemm
::
FloatAB
*
__restrict__
p_a_grid
,
const
typename
GridwiseGemm
::
FloatAB
*
__restrict__
p_b_grid
,
typename
GridwiseGemm
::
FloatC
*
__restrict__
p_c_grid
,
const
Argument
karg
)
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
...
...
@@ -44,9 +40,9 @@ __global__ void
}
template
<
index_t
BlockSize
,
typename
FloatAB
,
typename
FloatAB
_
,
typename
FloatAcc
,
typename
FloatC
,
typename
FloatC
_
,
InMemoryDataOperationEnum
CGlobalMemoryDataOperation
,
typename
AGridDesc_K0_M_K1
,
typename
BGridDesc_K0_N_K1
,
...
...
@@ -98,6 +94,9 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3
// K1 should be Number<...>
static
constexpr
auto
K1
=
Number
<
K1Value
>
{};
using
FloatAB
=
FloatAB_
;
using
FloatC
=
FloatC_
;
using
ThisThreadBlock
=
ThisThreadBlock
<
BlockSize
>
;
__host__
static
auto
CalculateGridSize
(
index_t
M
,
index_t
N
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment