Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
afa241a8
You need to sign in or sign up before continuing.
Commit
afa241a8
authored
Oct 22, 2024
by
Andriy Roshchenko
Browse files
Improve GEMM example verbosity.
parent
807a4818
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
47 additions
and
30 deletions
+47
-30
example/01_gemm/gemm_xdl_bf16.cpp
example/01_gemm/gemm_xdl_bf16.cpp
+1
-1
example/01_gemm/gemm_xdl_bf16_rtn.cpp
example/01_gemm/gemm_xdl_bf16_rtn.cpp
+1
-1
example/01_gemm/gemm_xdl_bf16_v3.cpp
example/01_gemm/gemm_xdl_bf16_v3.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16.cpp
example/01_gemm/gemm_xdl_fp16.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16_fp8.cpp
example/01_gemm/gemm_xdl_fp16_fp8.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16_fp8_v3.cpp
example/01_gemm/gemm_xdl_fp16_fp8_v3.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16_streamk_v3.cpp
example/01_gemm/gemm_xdl_fp16_streamk_v3.cpp
+4
-1
example/01_gemm/gemm_xdl_fp16_v2.cpp
example/01_gemm/gemm_xdl_fp16_v2.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16_v3.cpp
example/01_gemm/gemm_xdl_fp16_v3.cpp
+1
-1
example/01_gemm/gemm_xdl_fp64.cpp
example/01_gemm/gemm_xdl_fp64.cpp
+1
-1
example/01_gemm/gemm_xdl_fp8.cpp
example/01_gemm/gemm_xdl_fp8.cpp
+1
-1
example/01_gemm/gemm_xdl_fp8_bf8.cpp
example/01_gemm/gemm_xdl_fp8_bf8.cpp
+1
-1
example/01_gemm/gemm_xdl_fp8_v3.cpp
example/01_gemm/gemm_xdl_fp8_v3.cpp
+1
-1
example/01_gemm/gemm_xdl_int8.cpp
example/01_gemm/gemm_xdl_int8.cpp
+1
-1
example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp
example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp
+1
-1
example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp
example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp
+1
-1
example/01_gemm/gemm_xdl_streamk.cpp
example/01_gemm/gemm_xdl_streamk.cpp
+1
-1
example/01_gemm/gemm_xdl_wavelet_fp16.cpp
example/01_gemm/gemm_xdl_wavelet_fp16.cpp
+1
-1
example/01_gemm/run_gemm_example_streamk_v2.inc
example/01_gemm/run_gemm_example_streamk_v2.inc
+13
-6
example/01_gemm/run_gemm_example_v2.inc
example/01_gemm/run_gemm_example_v2.inc
+13
-6
No files found.
example/01_gemm/gemm_xdl_bf16.cpp
View file @
afa241a8
...
...
@@ -49,4 +49,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_bf16_rtn.cpp
View file @
afa241a8
...
...
@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_bf16_v3.cpp
View file @
afa241a8
...
...
@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_splitk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_splitk_example
(
argc
,
argv
)
)
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_fp16.cpp
View file @
afa241a8
...
...
@@ -60,4 +60,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp16_fp8.cpp
View file @
afa241a8
...
...
@@ -55,4 +55,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp16_fp8_v3.cpp
View file @
afa241a8
...
...
@@ -50,4 +50,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataTyp
#include "run_gemm_example_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_splitk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_splitk_example
(
argc
,
argv
)
)
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_fp16_streamk_v3.cpp
View file @
afa241a8
...
...
@@ -45,4 +45,7 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_streamk_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_universal_streamk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_universal_streamk_example
(
argc
,
argv
))
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_fp16_v2.cpp
View file @
afa241a8
...
...
@@ -59,4 +59,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp16_v3.cpp
View file @
afa241a8
...
...
@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_splitk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_splitk_example
(
argc
,
argv
)
)
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_fp64.cpp
View file @
afa241a8
...
...
@@ -54,4 +54,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp8.cpp
View file @
afa241a8
...
...
@@ -53,4 +53,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp8_bf8.cpp
View file @
afa241a8
...
...
@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp8_v3.cpp
View file @
afa241a8
...
...
@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_splitk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_splitk_example
(
argc
,
argv
)
)
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_int8.cpp
View file @
afa241a8
...
...
@@ -46,4 +46,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp
View file @
afa241a8
...
...
@@ -66,4 +66,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp
View file @
afa241a8
...
...
@@ -65,4 +65,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_streamk.cpp
View file @
afa241a8
...
...
@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_streamk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_streamk_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_wavelet_fp16.cpp
View file @
afa241a8
...
...
@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/run_gemm_example_streamk_v2.inc
View file @
afa241a8
...
...
@@ -243,6 +243,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
bool
pass
=
true
;
if
(
config
.
do_verification
)
{
std
::
cout
<<
"Compute reference GEMM on CPU... "
;
auto
ref_gemm
=
ReferenceGemmInstance
{};
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
...
...
@@ -250,8 +251,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
a_m_k
,
b_k_n
,
c_m_n_host_result
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
ref_invoker
.
Run
(
ref_argument
);
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
std
::
cout
<<
"Compute GEMM on device...
\n
"
;
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
,
1
});
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
#ifdef BUILD_INT4_EXAMPLE
Tensor
<
CDataType
>
c_m_n_device_result_converted
(
c_m_n_host_result
.
mDesc
);
...
...
@@ -263,16 +267,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#else
c_m_n_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
pass
&=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_host_result
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
pass
=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_host_result
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
if
(
pass
)
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
#endif
}
if
(
config
.
time_kernel
)
{
std
::
cout
<<
"Time GEMM on device...
\n
"
;
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
});
std
::
size_t
flop
=
2_
uz
*
M
*
N
*
K
;
...
...
@@ -286,7 +293,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
gemm
.
GetTypeString
()
<<
std
::
endl
;
}
return
pass
;
return
!
pass
;
}
bool
run_gemm_universal_streamk_example
(
int
argc
,
char
*
argv
[])
...
...
example/01_gemm/run_gemm_example_v2.inc
View file @
afa241a8
...
...
@@ -230,6 +230,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
bool
pass
=
true
;
if
(
config
.
do_verification
)
{
std
::
cout
<<
"Compute reference GEMM on CPU... "
;
auto
ref_gemm
=
ReferenceGemmInstance
{};
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
...
...
@@ -237,8 +238,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
a_m_k
,
b_k_n
,
c_m_n_host_result
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
ref_invoker
.
Run
(
ref_argument
);
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
std
::
cout
<<
"Compute GEMM on device...
\n
"
;
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
,
1
});
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
#ifdef BUILD_INT4_EXAMPLE
Tensor
<
CDataType
>
c_m_n_device_result_converted
(
c_m_n_host_result
.
mDesc
);
...
...
@@ -250,16 +254,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#else
c_m_n_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
pass
&=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_host_result
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
pass
=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_host_result
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
if
(
pass
)
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
#endif
}
if
(
config
.
time_kernel
)
{
std
::
cout
<<
"Time GEMM on device...
\n
"
;
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
,
0
,
5
,
10
,
true
,
4
});
...
...
@@ -274,7 +281,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
gemm
.
GetTypeString
()
<<
std
::
endl
;
}
return
pass
;
return
!
pass
;
}
bool
run_gemm_splitk_example
(
int
argc
,
char
*
argv
[])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment