Commit afa241a8 authored by Andriy Roshchenko's avatar Andriy Roshchenko
Browse files

Improve GEMM example verbosity.

parent 807a4818
...@@ -49,4 +49,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -49,4 +49,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host:: ...@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_v2.inc" #include "run_gemm_example_v2.inc"
int main(int argc, char* argv[]) { return !run_gemm_splitk_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_splitk_example(argc, argv)) ? -1 : 0; }
...@@ -60,4 +60,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -60,4 +60,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -55,4 +55,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -55,4 +55,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -50,4 +50,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataTyp ...@@ -50,4 +50,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataTyp
#include "run_gemm_example_v2.inc" #include "run_gemm_example_v2.inc"
int main(int argc, char* argv[]) { return !run_gemm_splitk_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_splitk_example(argc, argv)) ? -1 : 0; }
...@@ -45,4 +45,7 @@ using ReferenceGemmInstance = ck::tensor_operation::host:: ...@@ -45,4 +45,7 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_streamk_v2.inc" #include "run_gemm_example_streamk_v2.inc"
int main(int argc, char* argv[]) { return !run_gemm_universal_streamk_example(argc, argv); } int main(int argc, char* argv[])
{
return (run_gemm_universal_streamk_example(argc, argv)) ? -1 : 0;
}
...@@ -59,4 +59,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -59,4 +59,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host:: ...@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_v2.inc" #include "run_gemm_example_v2.inc"
int main(int argc, char* argv[]) { return !run_gemm_splitk_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_splitk_example(argc, argv)) ? -1 : 0; }
...@@ -54,4 +54,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -54,4 +54,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -53,4 +53,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -53,4 +53,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host:: ...@@ -45,4 +45,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
#include "run_gemm_example_v2.inc" #include "run_gemm_example_v2.inc"
int main(int argc, char* argv[]) { return !run_gemm_splitk_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_splitk_example(argc, argv)) ? -1 : 0; }
...@@ -46,4 +46,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -46,4 +46,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -66,4 +66,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -66,4 +66,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -65,4 +65,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -65,4 +65,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -57,4 +57,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_streamk_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_streamk_example(argc, argv) ? -1 : 0); }
...@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa ...@@ -50,4 +50,4 @@ using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALa
#include "run_gemm_example.inc" #include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); } int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? -1 : 0); }
...@@ -243,6 +243,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -243,6 +243,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
bool pass = true; bool pass = true;
if(config.do_verification) if(config.do_verification)
{ {
std::cout << "Compute reference GEMM on CPU... ";
auto ref_gemm = ReferenceGemmInstance{}; auto ref_gemm = ReferenceGemmInstance{};
auto ref_invoker = ref_gemm.MakeInvoker(); auto ref_invoker = ref_gemm.MakeInvoker();
...@@ -250,8 +251,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -250,8 +251,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
a_m_k, b_k_n, c_m_n_host_result, PassThrough{}, PassThrough{}, PassThrough{}); a_m_k, b_k_n, c_m_n_host_result, PassThrough{}, PassThrough{}, PassThrough{});
ref_invoker.Run(ref_argument); ref_invoker.Run(ref_argument);
std::cout << "DONE!" << std::endl;
std::cout << "Compute GEMM on device... \n";
ave_time = invoker.Run(argument, StreamConfig{nullptr, false, 1}); ave_time = invoker.Run(argument, StreamConfig{nullptr, false, 1});
std::cout << "DONE!" << std::endl;
#ifdef BUILD_INT4_EXAMPLE #ifdef BUILD_INT4_EXAMPLE
Tensor<CDataType> c_m_n_device_result_converted(c_m_n_host_result.mDesc); Tensor<CDataType> c_m_n_device_result_converted(c_m_n_host_result.mDesc);
...@@ -263,16 +267,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -263,16 +267,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#else #else
c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data()); c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data());
pass &= ck::utils::check_err(c_m_n_device_result, pass = ck::utils::check_err(c_m_n_device_result,
c_m_n_host_result, c_m_n_host_result,
"Error: Incorrect results!", "Error: Incorrect results!",
get_rtol<CDataType>(), get_rtol<CDataType>(),
get_atol<CDataType>()); get_atol<CDataType>());
if(pass)
std::cout << "Verification on CPU: PASS" << std::endl;
#endif #endif
} }
if(config.time_kernel) if(config.time_kernel)
{ {
std::cout << "Time GEMM on device... \n";
ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel}); ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
std::size_t flop = 2_uz * M * N * K; std::size_t flop = 2_uz * M * N * K;
...@@ -286,7 +293,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -286,7 +293,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
<< " GB/s, " << gemm.GetTypeString() << std::endl; << " GB/s, " << gemm.GetTypeString() << std::endl;
} }
return pass; return !pass;
} }
bool run_gemm_universal_streamk_example(int argc, char* argv[]) bool run_gemm_universal_streamk_example(int argc, char* argv[])
......
...@@ -230,6 +230,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -230,6 +230,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
bool pass = true; bool pass = true;
if(config.do_verification) if(config.do_verification)
{ {
std::cout << "Compute reference GEMM on CPU... ";
auto ref_gemm = ReferenceGemmInstance{}; auto ref_gemm = ReferenceGemmInstance{};
auto ref_invoker = ref_gemm.MakeInvoker(); auto ref_invoker = ref_gemm.MakeInvoker();
...@@ -237,8 +238,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -237,8 +238,11 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
a_m_k, b_k_n, c_m_n_host_result, PassThrough{}, PassThrough{}, PassThrough{}); a_m_k, b_k_n, c_m_n_host_result, PassThrough{}, PassThrough{}, PassThrough{});
ref_invoker.Run(ref_argument); ref_invoker.Run(ref_argument);
std::cout << "DONE!" << std::endl;
std::cout << "Compute GEMM on device... \n";
ave_time = invoker.Run(argument, StreamConfig{nullptr, false, 1}); ave_time = invoker.Run(argument, StreamConfig{nullptr, false, 1});
std::cout << "DONE!" << std::endl;
#ifdef BUILD_INT4_EXAMPLE #ifdef BUILD_INT4_EXAMPLE
Tensor<CDataType> c_m_n_device_result_converted(c_m_n_host_result.mDesc); Tensor<CDataType> c_m_n_device_result_converted(c_m_n_host_result.mDesc);
...@@ -250,16 +254,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -250,16 +254,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#else #else
c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data()); c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data());
pass &= ck::utils::check_err(c_m_n_device_result, pass = ck::utils::check_err(c_m_n_device_result,
c_m_n_host_result, c_m_n_host_result,
"Error: Incorrect results!", "Error: Incorrect results!",
get_rtol<CDataType>(), get_rtol<CDataType>(),
get_atol<CDataType>()); get_atol<CDataType>());
if(pass)
std::cout << "Verification on CPU: PASS" << std::endl;
#endif #endif
} }
if(config.time_kernel) if(config.time_kernel)
{ {
std::cout << "Time GEMM on device... \n";
ave_time = ave_time =
invoker.Run(argument, StreamConfig{nullptr, config.time_kernel, 0, 5, 10, true, 4}); invoker.Run(argument, StreamConfig{nullptr, config.time_kernel, 0, 5, 10, true, 4});
...@@ -274,7 +281,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -274,7 +281,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
<< " GB/s, " << gemm.GetTypeString() << std::endl; << " GB/s, " << gemm.GetTypeString() << std::endl;
} }
return pass; return !pass;
} }
bool run_gemm_splitk_example(int argc, char* argv[]) bool run_gemm_splitk_example(int argc, char* argv[])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment