Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
363b6744
Commit
363b6744
authored
Jan 14, 2025
by
mtgu0705
Browse files
add instance for gemm_ab_scale
parent
9dac9713
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
237 additions
and
218 deletions
+237
-218
library/include/ck/library/tensor_operation_instance/gpu/gemm_ab_scale.hpp
...k/library/tensor_operation_instance/gpu/gemm_ab_scale.hpp
+23
-23
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128.hpp
...ice_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128.hpp
+21
-21
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_instance.cpp
...f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_instance.cpp
+3
-3
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_instance.cpp
...8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_instance.cpp
+3
-3
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnkpadding_instance.cpp
...f8_bf16_mk_nk_mn_128_128_128_comp_mnkpadding_instance.cpp
+3
-3
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnpadding_instance.cpp
..._f8_bf16_mk_nk_mn_128_128_128_comp_mnpadding_instance.cpp
+3
-3
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default_instance.cpp
..._f8_bf16_mk_nk_mn_128_128_128_mem_v1_default_instance.cpp
+3
-3
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpadding_instance.cpp
...f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpadding_instance.cpp
+3
-3
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_mnkpadding_instance.cpp
..._bf16_mk_nk_mn_128_128_128_mem_v1_mnkpadding_instance.cpp
+3
-3
profiler/src/CMakeLists.txt
profiler/src/CMakeLists.txt
+151
-151
profiler/src/profile_gemm_ab_scale.cpp
profiler/src/profile_gemm_ab_scale.cpp
+21
-2
No files found.
library/include/ck/library/tensor_operation_instance/gpu/gemm_ab_scale.hpp
View file @
363b6744
...
...
@@ -17,7 +17,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
#if(defined(CK_ENABLE_BF16) || defined(CK_ENABLE_FP8))
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_default_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_default_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -28,14 +28,14 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_i
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
);
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_kpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_kpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -46,14 +46,14 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
);
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_mnpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_mnpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -64,14 +64,14 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnpadding
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
);
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_mnkpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_mnkpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -82,14 +82,14 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnkpaddin
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
);
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_v1_default_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_v1_default_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -100,14 +100,14 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
);
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_v1_kpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_v1_kpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -118,14 +118,14 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpaddin
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
);
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_v1_mnkpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_v1_mnkpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -136,7 +136,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_mnkpadd
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
...
...
@@ -163,7 +163,7 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGemmMu
B1DataType
,
Tuple
<>
,
CDataType
,
1
28
,
1
,
128
,
128
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
...
...
@@ -180,7 +180,7 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGemmMu
B1DataType
,
Tuple
<>
,
CDataType
,
1
28
,
1
,
128
,
128
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
...
...
@@ -198,20 +198,20 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGemmMu
if
constexpr
(
is_same_v
<
ALayout
,
Row
>
&&
is_same_v
<
BLayout
,
Col
>
&&
is_same_v
<
CLayout
,
Row
>
)
{
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_default_instances
(
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_default_instances
(
op_ptrs
);
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_kpadding_instances
(
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_kpadding_instances
(
op_ptrs
);
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_mnpadding_instances
(
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_mnpadding_instances
(
op_ptrs
);
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_mnkpadding_instances
(
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_mnkpadding_instances
(
op_ptrs
);
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_v1_default_instances
(
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_v1_default_instances
(
op_ptrs
);
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_v1_kpadding_instances
(
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_v1_kpadding_instances
(
op_ptrs
);
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_v1_mnkpadding_instances
(
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_v1_mnkpadding_instances
(
op_ptrs
);
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128.hpp
View file @
363b6744
This diff is collapsed.
Click to expand it.
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_instance.cpp
View file @
363b6744
...
...
@@ -8,7 +8,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_default_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_default_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -19,7 +19,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_i
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
...
...
@@ -28,7 +28,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_i
{
add_device_operation_instances
(
instances
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_instances
<
GemmDefault
>
{});
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_instances
<
GemmDefault
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_instance.cpp
View file @
363b6744
...
...
@@ -8,7 +8,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_kpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_kpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -19,7 +19,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
...
...
@@ -28,7 +28,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_
{
add_device_operation_instances
(
instances
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_instances
<
GemmKPadding
>
{});
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_instances
<
GemmKPadding
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnkpadding_instance.cpp
View file @
363b6744
...
...
@@ -8,7 +8,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_mnkpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_mnkpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -19,7 +19,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnkpaddin
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
...
...
@@ -28,7 +28,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnkpaddin
{
add_device_operation_instances
(
instances
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_instances
<
GemmMNKPadding
>
{});
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_instances
<
GemmMNKPadding
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnpadding_instance.cpp
View file @
363b6744
...
...
@@ -8,7 +8,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_mnpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_mnpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -19,7 +19,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnpadding
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
...
...
@@ -28,7 +28,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_mnpadding
{
add_device_operation_instances
(
instances
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_comp_instances
<
GemmMNPadding
>
{});
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_comp_instances
<
GemmMNPadding
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default_instance.cpp
View file @
363b6744
...
...
@@ -8,7 +8,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_v1_default_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_v1_default_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -19,7 +19,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
...
...
@@ -28,7 +28,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default
{
add_device_operation_instances
(
instances
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_instances
<
Intrawave
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_instances
<
Intrawave
,
GemmDefault
>
{});
}
...
...
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpadding_instance.cpp
View file @
363b6744
...
...
@@ -8,7 +8,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_v1_kpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_v1_kpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -19,7 +19,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpaddin
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
...
...
@@ -28,7 +28,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpaddin
{
add_device_operation_instances
(
instances
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_instances
<
Intrawave
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_instances
<
Intrawave
,
GemmKPadding
>
{});
}
...
...
library/src/tensor_operation_instance/gpu/gemm_ab_scale/device_gemm_ab_scale_xdl_f8_f8_bf16/device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_mnkpadding_instance.cpp
View file @
363b6744
...
...
@@ -8,7 +8,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_v1_mnkpadding_instances
(
void
add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_v1_mnkpadding_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGemmMultipleD_ABScale
<
Row
,
Col
,
Tuple
<>
,
...
...
@@ -19,7 +19,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_mnkpadd
F32
,
Tuple
<>
,
BF16
,
1
28
,
1
,
128
,
128
,
PassThrough
,
...
...
@@ -28,7 +28,7 @@ void add_device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_mnkpadd
{
add_device_operation_instances
(
instances
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1
28
_128_128_mem_instances
<
Intrawave
,
device_gemm_ab_scale_xdl_f8_f8_bf16_mk_nk_mn_1_128_128_mem_instances
<
Intrawave
,
GemmMNKPadding
>
{});
}
...
...
profiler/src/CMakeLists.txt
View file @
363b6744
This diff is collapsed.
Click to expand it.
profiler/src/profile_gemm_ab_scale.cpp
View file @
363b6744
...
...
@@ -32,8 +32,10 @@ enum struct GemmDataType
enum
struct
ScaleBlockTile
{
Tile_128_128_128
,
// 0
Tile_1_128_128
,
// 1
};
#define OP_NAME "gemm_ab_scale"
#define OP_DESC "GEMM_AB_Scale"
...
...
@@ -154,8 +156,25 @@ int profile_gemm_ab_scale(int argc, char* argv[])
return
pass
?
0
:
1
;
};
// if(data_type == GemmDataType::F8_F8_BF16 && layout == GemmMatrixLayout::MK_NK_MN &&
// scale_block_tile == ScaleBlockTile::Tile_128_128_128)
// {
// return profile(F8{},
// F32{},
// F8{},
// F32{},
// F8{},
// F32{},
// BF16{},
// ck::Number<128>{},
// ck::Number<128>{},
// ck::Number<128>{},
// Row{},
// Col{},
// Row{});
// }
if
(
data_type
==
GemmDataType
::
F8_F8_BF16
&&
layout
==
GemmMatrixLayout
::
MK_NK_MN
&&
scale_block_tile
==
ScaleBlockTile
::
Tile_1
28
_128_128
)
scale_block_tile
==
ScaleBlockTile
::
Tile_1_128_128
)
{
return
profile
(
F8
{},
F32
{},
...
...
@@ -164,7 +183,7 @@ int profile_gemm_ab_scale(int argc, char* argv[])
F8
{},
F32
{},
BF16
{},
ck
::
Number
<
1
28
>
{},
ck
::
Number
<
1
>
{},
ck
::
Number
<
128
>
{},
ck
::
Number
<
128
>
{},
Row
{},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment