Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
afa241a8
Commit
afa241a8
authored
Oct 22, 2024
by
Andriy Roshchenko
Browse files
Improve GEMM example verbosity.
parent
807a4818
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
47 additions
and
30 deletions
+47
-30
example/01_gemm/gemm_xdl_bf16.cpp
example/01_gemm/gemm_xdl_bf16.cpp
+1
-1
example/01_gemm/gemm_xdl_bf16_rtn.cpp
example/01_gemm/gemm_xdl_bf16_rtn.cpp
+1
-1
example/01_gemm/gemm_xdl_bf16_v3.cpp
example/01_gemm/gemm_xdl_bf16_v3.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16.cpp
example/01_gemm/gemm_xdl_fp16.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16_fp8.cpp
example/01_gemm/gemm_xdl_fp16_fp8.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16_fp8_v3.cpp
example/01_gemm/gemm_xdl_fp16_fp8_v3.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16_streamk_v3.cpp
example/01_gemm/gemm_xdl_fp16_streamk_v3.cpp
+4
-1
example/01_gemm/gemm_xdl_fp16_v2.cpp
example/01_gemm/gemm_xdl_fp16_v2.cpp
+1
-1
example/01_gemm/gemm_xdl_fp16_v3.cpp
example/01_gemm/gemm_xdl_fp16_v3.cpp
+1
-1
example/01_gemm/gemm_xdl_fp64.cpp
example/01_gemm/gemm_xdl_fp64.cpp
+1
-1
example/01_gemm/gemm_xdl_fp8.cpp
example/01_gemm/gemm_xdl_fp8.cpp
+1
-1
example/01_gemm/gemm_xdl_fp8_bf8.cpp
example/01_gemm/gemm_xdl_fp8_bf8.cpp
+1
-1
example/01_gemm/gemm_xdl_fp8_v3.cpp
example/01_gemm/gemm_xdl_fp8_v3.cpp
+1
-1
example/01_gemm/gemm_xdl_int8.cpp
example/01_gemm/gemm_xdl_int8.cpp
+1
-1
example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp
example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp
+1
-1
example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp
example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp
+1
-1
example/01_gemm/gemm_xdl_streamk.cpp
example/01_gemm/gemm_xdl_streamk.cpp
+1
-1
example/01_gemm/gemm_xdl_wavelet_fp16.cpp
example/01_gemm/gemm_xdl_wavelet_fp16.cpp
+1
-1
example/01_gemm/run_gemm_example_streamk_v2.inc
example/01_gemm/run_gemm_example_streamk_v2.inc
+13
-6
example/01_gemm/run_gemm_example_v2.inc
example/01_gemm/run_gemm_example_v2.inc
+13
-6
No files found.
example/01_gemm/gemm_xdl_bf16.cpp
View file @
afa241a8
...
@@ -49,4 +49,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -49,4 +49,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_bf16_rtn.cpp
View file @
afa241a8
...
@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_bf16_v3.cpp
View file @
afa241a8
...
@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
...
@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_v2.inc"
#include "run_gemm_example_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_splitk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_splitk_example
(
argc
,
argv
)
)
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_fp16.cpp
View file @
afa241a8
...
@@ -60,4 +60,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -60,4 +60,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp16_fp8.cpp
View file @
afa241a8
...
@@ -55,4 +55,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -55,4 +55,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp16_fp8_v3.cpp
View file @
afa241a8
...
@@ -50,4 +50,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataTyp
...
@@ -50,4 +50,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataTyp
#include "run_gemm_example_v2.inc"
#include "run_gemm_example_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_splitk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_splitk_example
(
argc
,
argv
)
)
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_fp16_streamk_v3.cpp
View file @
afa241a8
...
@@ -45,4 +45,7 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
...
@@ -45,4 +45,7 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_streamk_v2.inc"
#include "run_gemm_example_streamk_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_universal_streamk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_universal_streamk_example
(
argc
,
argv
))
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_fp16_v2.cpp
View file @
afa241a8
...
@@ -59,4 +59,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -59,4 +59,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp16_v3.cpp
View file @
afa241a8
...
@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
...
@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_v2.inc"
#include "run_gemm_example_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_splitk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_splitk_example
(
argc
,
argv
)
)
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_fp64.cpp
View file @
afa241a8
...
@@ -54,4 +54,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -54,4 +54,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp8.cpp
View file @
afa241a8
...
@@ -53,4 +53,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -53,4 +53,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp8_bf8.cpp
View file @
afa241a8
...
@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_fp8_v3.cpp
View file @
afa241a8
...
@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
...
@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_v2.inc"
#include "run_gemm_example_v2.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_splitk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_splitk_example
(
argc
,
argv
)
)
?
-
1
:
0
;
}
example/01_gemm/gemm_xdl_int8.cpp
View file @
afa241a8
...
@@ -46,4 +46,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -46,4 +46,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp
View file @
afa241a8
...
@@ -66,4 +66,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -66,4 +66,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp
View file @
afa241a8
...
@@ -65,4 +65,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -65,4 +65,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_streamk.cpp
View file @
afa241a8
...
@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_streamk_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_streamk_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/gemm_xdl_wavelet_fp16.cpp
View file @
afa241a8
...
@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
...
@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc"
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
(
run_gemm_example
(
argc
,
argv
)
?
-
1
:
0
)
;
}
example/01_gemm/run_gemm_example_streamk_v2.inc
View file @
afa241a8
...
@@ -243,6 +243,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -243,6 +243,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
bool
pass
=
true
;
bool
pass
=
true
;
if
(
config
.
do_verification
)
if
(
config
.
do_verification
)
{
{
std
::
cout
<<
"Compute reference GEMM on CPU... "
;
auto
ref_gemm
=
ReferenceGemmInstance
{};
auto
ref_gemm
=
ReferenceGemmInstance
{};
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
...
@@ -250,8 +251,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -250,8 +251,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
a_m_k
,
b_k_n
,
c_m_n_host_result
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
a_m_k
,
b_k_n
,
c_m_n_host_result
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
ref_invoker
.
Run
(
ref_argument
);
ref_invoker
.
Run
(
ref_argument
);
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
std
::
cout
<<
"Compute GEMM on device...
\n
"
;
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
,
1
});
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
,
1
});
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
#ifdef BUILD_INT4_EXAMPLE
#ifdef BUILD_INT4_EXAMPLE
Tensor
<
CDataType
>
c_m_n_device_result_converted
(
c_m_n_host_result
.
mDesc
);
Tensor
<
CDataType
>
c_m_n_device_result_converted
(
c_m_n_host_result
.
mDesc
);
...
@@ -263,16 +267,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -263,16 +267,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#else
#else
c_m_n_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
c_m_n_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
pass
&=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
pass
=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_host_result
,
c_m_n_host_result
,
"Error: Incorrect results!"
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
get_atol
<
CDataType
>
());
if
(
pass
)
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
#endif
#endif
}
}
if
(
config
.
time_kernel
)
if
(
config
.
time_kernel
)
{
{
std
::
cout
<<
"Time GEMM on device...
\n
"
;
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
});
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
});
std
::
size_t
flop
=
2_
uz
*
M
*
N
*
K
;
std
::
size_t
flop
=
2_
uz
*
M
*
N
*
K
;
...
@@ -286,7 +293,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -286,7 +293,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
gemm
.
GetTypeString
()
<<
std
::
endl
;
<<
" GB/s, "
<<
gemm
.
GetTypeString
()
<<
std
::
endl
;
}
}
return
pass
;
return
!
pass
;
}
}
bool
run_gemm_universal_streamk_example
(
int
argc
,
char
*
argv
[])
bool
run_gemm_universal_streamk_example
(
int
argc
,
char
*
argv
[])
...
...
example/01_gemm/run_gemm_example_v2.inc
View file @
afa241a8
...
@@ -230,6 +230,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -230,6 +230,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
bool
pass
=
true
;
bool
pass
=
true
;
if
(
config
.
do_verification
)
if
(
config
.
do_verification
)
{
{
std
::
cout
<<
"Compute reference GEMM on CPU... "
;
auto
ref_gemm
=
ReferenceGemmInstance
{};
auto
ref_gemm
=
ReferenceGemmInstance
{};
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
...
@@ -237,8 +238,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -237,8 +238,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
a_m_k
,
b_k_n
,
c_m_n_host_result
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
a_m_k
,
b_k_n
,
c_m_n_host_result
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
ref_invoker
.
Run
(
ref_argument
);
ref_invoker
.
Run
(
ref_argument
);
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
std
::
cout
<<
"Compute GEMM on device...
\n
"
;
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
,
1
});
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
,
1
});
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
#ifdef BUILD_INT4_EXAMPLE
#ifdef BUILD_INT4_EXAMPLE
Tensor
<
CDataType
>
c_m_n_device_result_converted
(
c_m_n_host_result
.
mDesc
);
Tensor
<
CDataType
>
c_m_n_device_result_converted
(
c_m_n_host_result
.
mDesc
);
...
@@ -250,16 +254,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -250,16 +254,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#else
#else
c_m_n_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
c_m_n_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
pass
&=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
pass
=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_host_result
,
c_m_n_host_result
,
"Error: Incorrect results!"
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
get_atol
<
CDataType
>
());
if
(
pass
)
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
#endif
#endif
}
}
if
(
config
.
time_kernel
)
if
(
config
.
time_kernel
)
{
{
std
::
cout
<<
"Time GEMM on device...
\n
"
;
ave_time
=
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
,
0
,
5
,
10
,
true
,
4
});
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
,
0
,
5
,
10
,
true
,
4
});
...
@@ -274,7 +281,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -274,7 +281,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
gemm
.
GetTypeString
()
<<
std
::
endl
;
<<
" GB/s, "
<<
gemm
.
GetTypeString
()
<<
std
::
endl
;
}
}
return
pass
;
return
!
pass
;
}
}
bool
run_gemm_splitk_example
(
int
argc
,
char
*
argv
[])
bool
run_gemm_splitk_example
(
int
argc
,
char
*
argv
[])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment