Commit 24771ab7 authored by Andriy Roshchenko's avatar Andriy Roshchenko
Browse files

Optionaly run either CPU or GPU verifications with GEMM examples.

parent 02958ba5
......@@ -75,9 +75,10 @@ struct ProblemSizeSplitK final
struct ExecutionConfig final
{
bool do_verification = true;
int init_method = 2;
bool time_kernel = false;
// 0 - no verification, 1 - CPU, 2 - GPU, 3 - CPU + GPU
int do_verification = 3;
int init_method = 2;
bool time_kernel = false;
};
template <ck::index_t... Is>
......@@ -126,7 +127,7 @@ bool parse_cmd_args<ProblemSize>(int argc,
}
else
{
std::cerr << "arg1: verification (0=no, 1=CPU and GPU)" << std::endl
std::cerr << "arg1: verification (0=no, 1=CPU, 2=GPU, 3=CPU and GPU)" << std::endl
<< "arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<< std::endl
<< "arg3: time kernel (0=no, 1=yes)" << std::endl
......@@ -176,7 +177,7 @@ bool parse_cmd_args<ProblemSizeStreamK_universal>(int argc,
else
{
std::cerr
<< "arg1: verification (0=no, 1=CPU and GPU)" << std::endl
<< "arg1: verification (0=no, 1=CPU, 2=GPU, 3=CPU and GPU)" << std::endl
<< "arg2: initialization (0=no init, 1=integer value, 2=decimal value)" << std::endl
<< "arg3: time kernel (0=no, 1=yes)" << std::endl
<< "arg4 to 9: M (256x), N(128x), K(32x), StrideA, StrideB, StrideC" << std::endl
......@@ -225,7 +226,7 @@ bool parse_cmd_args<ProblemSizeStreamK>(int argc,
}
else
{
std::cerr << "arg1: verification (0=no, 1=CPU and GPU)" << std::endl
std::cerr << "arg1: verification (0=no, 1=CPU, 2=GPU, 3=CPU and GPU)" << std::endl
<< "arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<< std::endl
<< "arg3: time kernel (0=no, 1=yes)" << std::endl
......@@ -275,7 +276,7 @@ bool parse_cmd_args<ProblemSizeSplitK>(int argc,
}
else
{
std::cerr << "arg1: verification (0=no, 1=CPU and GPU)" << std::endl
std::cerr << "arg1: verification (0=no, 1=CPU, 2=GPU, 3=CPU and GPU)" << std::endl
<< "arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<< std::endl
<< "arg3: time kernel (0=no, 1=yes)" << std::endl
......
......@@ -337,7 +337,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
bool pass = true;
if(config.do_verification)
if((config.do_verification == 1) || (config.do_verification == 3))
{
// CPU verification
auto ref_gemm = ReferenceGemmInstance{};
......@@ -368,7 +368,10 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#endif
if(pass)
std::cout << "Verification on CPU: PASS" << std::endl;
}
if((config.do_verification == 2) || (config.do_verification == 3))
{
// GPU verification
auto ref_gemm_gpu = ReferenceGemmInstanceGPU{};
auto ref_invoker_gpu = ref_gemm_gpu.MakeInvoker();
......
......@@ -241,7 +241,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
}
bool pass = true;
if(config.do_verification)
if((config.do_verification == 1) || (config.do_verification == 3))
{
std::cout << "Compute reference GEMM on CPU... ";
auto ref_gemm = ReferenceGemmInstance{};
......
......@@ -228,7 +228,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
}
bool pass = true;
if(config.do_verification)
if((config.do_verification == 1) || (config.do_verification == 3))
{
std::cout << "Compute reference GEMM on CPU... ";
auto ref_gemm = ReferenceGemmInstance{};
......
......@@ -76,7 +76,7 @@ __global__ void
// apply b_element_op
b_element_op(v_b, p_b_grid[element_idx_b]);
// multiply and accumulate
v_acc += static_cast<AccDataType>(v_a) * static_cast<AccDataType>(v_b);
v_acc += type_convert<AccDataType>(v_a) * type_convert<AccDataType>(v_b);
}
// apply c_element_op
c_element_op(v_c, v_acc);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment