Commit b1a7d2a7 authored by Andriy Roshchenko's avatar Andriy Roshchenko
Browse files

Improve GEMM example verbosity.

parent 10b0d214
...@@ -65,6 +65,16 @@ ...@@ -65,6 +65,16 @@
"CMAKE_BUILD_TYPE": "Release", "CMAKE_BUILD_TYPE": "Release",
"CMAKE_CXX_FLAGS": "-O3" "CMAKE_CXX_FLAGS": "-O3"
} }
},
{
"name": "MI250-debug",
"displayName": "MI250 Debug",
"inherits": "linux-debug",
"cacheVariables": {
"GPU_TARGETS": "gfx90a",
"CMAKE_BUILD_TYPE": "Debug",
"CMAKE_CXX_FLAGS": "-O0 -ggdb"
}
} }
], ],
"buildPresets": [ "buildPresets": [
...@@ -117,6 +127,16 @@ ...@@ -117,6 +127,16 @@
"Release" "Release"
], ],
"jobs": 128 "jobs": 128
},
{
"name": "MI250-debug",
"displayName": "MI250",
"configurePreset": "MI250-debug",
"description": "Build Environment for MI250 Debug.",
"inherits": [
"Debug"
],
"jobs": 128
} }
] ]
} }
...@@ -317,16 +317,23 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -317,16 +317,23 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
return true; return true;
} }
std::size_t flop = 2_uz * M * N * K; if(config.time_kernel)
std::size_t num_btype = {
sizeof(ADataType) * M * K + sizeof(BDataType) * K * N + sizeof(CDataType) * M * N; std::size_t flop = 2_uz * M * N * K;
std::size_t num_btype =
sizeof(ADataType) * M * K + sizeof(BDataType) * K * N + sizeof(CDataType) * M * N;
float tflops = static_cast<float>(flop) / 1.E9 / ave_time; float tflops = static_cast<float>(flop) / 1.E9 / ave_time;
float gb_per_sec = num_btype / 1.E6 / ave_time; float gb_per_sec = num_btype / 1.E6 / ave_time;
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, " std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
<< gemm.GetTypeString() << std::endl; << " GB/s, " << gemm.GetTypeString() << std::endl;
}
else
{
std::cout << "FINISHED: " << gemm.GetTypeString() << std::endl;
}
bool pass = true; bool pass = true;
...@@ -353,12 +360,14 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -353,12 +360,14 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#else #else
c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data()); c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data());
pass &= !ck::utils::check_err(c_m_n_device_result, pass = ck::utils::check_err(c_m_n_device_result,
c_m_n_host_result, c_m_n_host_result,
"Error: Incorrect results!", "Error: Incorrect results!",
get_rtol<CDataType>(), get_rtol<CDataType>(),
get_atol<CDataType>()); get_atol<CDataType>());
#endif #endif
if(pass)
std::cout << "Verification on CPU: PASS" << std::endl;
// GPU verification // GPU verification
auto ref_gemm_gpu = ReferenceGemmInstanceGPU{}; auto ref_gemm_gpu = ReferenceGemmInstanceGPU{};
...@@ -381,11 +390,13 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -381,11 +390,13 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
c_m_n_device_ref_buf.FromDevice(c_m_n_device_ref_result.mData.data()); c_m_n_device_ref_buf.FromDevice(c_m_n_device_ref_result.mData.data());
c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data()); c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data());
pass &= !ck::utils::check_err(c_m_n_device_result, pass = ck::utils::check_err(c_m_n_device_result,
c_m_n_device_ref_result, c_m_n_device_ref_result,
"Error: Incorrect results!", "Error: Incorrect results!",
get_rtol<CDataType>(), get_rtol<CDataType>(),
get_atol<CDataType>()); get_atol<CDataType>());
if(pass)
std::cout << "Verification on GPU: PASS" << std::endl;
} }
return !pass; return !pass;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment