Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
99bd2285
Commit
99bd2285
authored
Nov 05, 2023
by
Jing Zhang
Browse files
clean
parent
3644186f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
14 deletions
+12
-14
library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp
...include/ck/library/tensor_operation_instance/gpu/gemm.hpp
+8
-8
library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp
.../device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp
.../device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp
+2
-3
No files found.
library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp
View file @
99bd2285
...
@@ -377,7 +377,7 @@ struct DeviceOperationInstanceFactory<
...
@@ -377,7 +377,7 @@ struct DeviceOperationInstanceFactory<
if
constexpr
(
is_same_v
<
ALayout
,
Row
>
&&
is_same_v
<
BLayout
,
Row
>
&&
if
constexpr
(
is_same_v
<
ALayout
,
Row
>
&&
is_same_v
<
BLayout
,
Row
>
&&
is_same_v
<
CLayout
,
Row
>
)
is_same_v
<
CLayout
,
Row
>
)
{
{
//
add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(op_ptrs);
add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances
(
op_ptrs
);
#ifdef DL_KERNELS
#ifdef DL_KERNELS
add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances
(
op_ptrs
);
#endif
#endif
...
@@ -386,7 +386,7 @@ struct DeviceOperationInstanceFactory<
...
@@ -386,7 +386,7 @@ struct DeviceOperationInstanceFactory<
else
if
constexpr
(
is_same_v
<
ALayout
,
Row
>
&&
is_same_v
<
BLayout
,
Col
>
&&
else
if
constexpr
(
is_same_v
<
ALayout
,
Row
>
&&
is_same_v
<
BLayout
,
Col
>
&&
is_same_v
<
CLayout
,
Row
>
)
is_same_v
<
CLayout
,
Row
>
)
{
{
//
add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(op_ptrs);
add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances
(
op_ptrs
);
#ifdef DL_KERNELS
#ifdef DL_KERNELS
add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances
(
op_ptrs
);
#endif
#endif
...
@@ -395,7 +395,7 @@ struct DeviceOperationInstanceFactory<
...
@@ -395,7 +395,7 @@ struct DeviceOperationInstanceFactory<
else
if
constexpr
(
is_same_v
<
ALayout
,
Col
>
&&
is_same_v
<
BLayout
,
Row
>
&&
else
if
constexpr
(
is_same_v
<
ALayout
,
Col
>
&&
is_same_v
<
BLayout
,
Row
>
&&
is_same_v
<
CLayout
,
Row
>
)
is_same_v
<
CLayout
,
Row
>
)
{
{
//
add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(op_ptrs);
add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances
(
op_ptrs
);
#ifdef DL_KERNELS
#ifdef DL_KERNELS
add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances
(
op_ptrs
);
#endif
#endif
...
@@ -404,7 +404,7 @@ struct DeviceOperationInstanceFactory<
...
@@ -404,7 +404,7 @@ struct DeviceOperationInstanceFactory<
else
if
constexpr
(
is_same_v
<
ALayout
,
Col
>
&&
is_same_v
<
BLayout
,
Col
>
&&
else
if
constexpr
(
is_same_v
<
ALayout
,
Col
>
&&
is_same_v
<
BLayout
,
Col
>
&&
is_same_v
<
CLayout
,
Row
>
)
is_same_v
<
CLayout
,
Row
>
)
{
{
//
add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(op_ptrs);
add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances
(
op_ptrs
);
#ifdef DL_KERNELS
#ifdef DL_KERNELS
add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances
(
op_ptrs
);
#endif
#endif
...
@@ -418,7 +418,7 @@ struct DeviceOperationInstanceFactory<
...
@@ -418,7 +418,7 @@ struct DeviceOperationInstanceFactory<
if
constexpr
(
is_same_v
<
ALayout
,
Row
>
&&
is_same_v
<
BLayout
,
Row
>
&&
if
constexpr
(
is_same_v
<
ALayout
,
Row
>
&&
is_same_v
<
BLayout
,
Row
>
&&
is_same_v
<
CLayout
,
Row
>
)
is_same_v
<
CLayout
,
Row
>
)
{
{
//
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(op_ptrs);
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances
(
op_ptrs
);
#ifdef DL_KERNELS
#ifdef DL_KERNELS
add_device_gemm_dl_f16_f16_f16_mk_kn_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_mk_kn_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instances
(
op_ptrs
);
...
@@ -430,7 +430,7 @@ struct DeviceOperationInstanceFactory<
...
@@ -430,7 +430,7 @@ struct DeviceOperationInstanceFactory<
else
if
constexpr
(
is_same_v
<
ALayout
,
Row
>
&&
is_same_v
<
BLayout
,
Col
>
&&
else
if
constexpr
(
is_same_v
<
ALayout
,
Row
>
&&
is_same_v
<
BLayout
,
Col
>
&&
is_same_v
<
CLayout
,
Row
>
)
is_same_v
<
CLayout
,
Row
>
)
{
{
//
add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(op_ptrs);
add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances
(
op_ptrs
);
#ifdef DL_KERNELS
#ifdef DL_KERNELS
add_device_gemm_dl_f16_f16_f16_mk_nk_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_mk_nk_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instances
(
op_ptrs
);
...
@@ -443,7 +443,7 @@ struct DeviceOperationInstanceFactory<
...
@@ -443,7 +443,7 @@ struct DeviceOperationInstanceFactory<
else
if
constexpr
(
is_same_v
<
ALayout
,
Col
>
&&
is_same_v
<
BLayout
,
Row
>
&&
else
if
constexpr
(
is_same_v
<
ALayout
,
Col
>
&&
is_same_v
<
BLayout
,
Row
>
&&
is_same_v
<
CLayout
,
Row
>
)
is_same_v
<
CLayout
,
Row
>
)
{
{
//
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(op_ptrs);
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances
(
op_ptrs
);
#ifdef DL_KERNELS
#ifdef DL_KERNELS
add_device_gemm_dl_f16_f16_f16_km_kn_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_km_kn_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instances
(
op_ptrs
);
...
@@ -455,7 +455,7 @@ struct DeviceOperationInstanceFactory<
...
@@ -455,7 +455,7 @@ struct DeviceOperationInstanceFactory<
else
if
constexpr
(
is_same_v
<
ALayout
,
Col
>
&&
is_same_v
<
BLayout
,
Col
>
&&
else
if
constexpr
(
is_same_v
<
ALayout
,
Col
>
&&
is_same_v
<
BLayout
,
Col
>
&&
is_same_v
<
CLayout
,
Row
>
)
is_same_v
<
CLayout
,
Row
>
)
{
{
//
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(op_ptrs);
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances
(
op_ptrs
);
#ifdef DL_KERNELS
#ifdef DL_KERNELS
add_device_gemm_dl_f16_f16_f16_km_nk_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_km_nk_mn_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instances
(
op_ptrs
);
add_device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instances
(
op_ptrs
);
...
...
library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp
View file @
99bd2285
...
@@ -26,9 +26,8 @@ using S = ck::Sequence<Is...>;
...
@@ -26,9 +26,8 @@ using S = ck::Sequence<Is...>;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
// static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
// static constexpr auto GemmMNPadding =
static
constexpr
auto
GemmMNPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNPadding
;
// ck::tensor_operation::device::GemmSpecialization::MNPadding;
static
constexpr
auto
GemmMNKPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
static
constexpr
auto
GemmMNKPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
using
device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_generic_instances
=
std
::
tuple
<
using
device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_generic_instances
=
std
::
tuple
<
...
...
library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp
View file @
99bd2285
...
@@ -27,9 +27,8 @@ using S = ck::Sequence<Is...>;
...
@@ -27,9 +27,8 @@ using S = ck::Sequence<Is...>;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
// static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
// static constexpr auto GemmMNPadding =
static
constexpr
auto
GemmMNPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNPadding
;
// ck::tensor_operation::device::GemmSpecialization::MNPadding;
static
constexpr
auto
GemmMNKPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
static
constexpr
auto
GemmMNKPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
using
device_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_generic_instances
=
std
::
tuple
<
using
device_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_generic_instances
=
std
::
tuple
<
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment