Commit 9db34134 authored by Bartlomiej Kocot's avatar Bartlomiej Kocot
Browse files

Fail when no kernel is applicable

parent 8f84a012
......@@ -126,6 +126,7 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
std::string best_instance_name;
float best_avg_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
int num_kernel = 0;
if(do_verification)
{
......@@ -145,8 +146,6 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
ref_invoker.Run(ref_pooling_bwd_argument);
}
int num_kernel = 0;
for(auto& inst_ptr : instance_ptrs)
{
auto argument_ptr = inst_ptr->MakeArgumentPointer(
......
......@@ -263,6 +263,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
......@@ -301,6 +302,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
std::string op_name = op_ptr->GetTypeString();
float ave_time =
......@@ -350,6 +352,12 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
......
......@@ -273,6 +273,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
......@@ -310,6 +311,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
std::string op_name = op_ptr->GetTypeString();
float ave_time =
......@@ -385,6 +387,12 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
......
......@@ -223,6 +223,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
......@@ -255,6 +256,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
std::string op_name = op_ptr->GetTypeString();
float ave_time =
......@@ -309,6 +311,12 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
......
......@@ -136,6 +136,7 @@ bool profile_batched_gemm_impl(int do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
......@@ -201,6 +202,7 @@ bool profile_batched_gemm_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
// re-init C to zero before profiling next kernel
c_device_buf.SetZero();
......@@ -254,6 +256,12 @@ bool profile_batched_gemm_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
......
......@@ -254,6 +254,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device GEMM instances
for(auto& gemm_ptr : gemm_ptrs)
......@@ -281,6 +282,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
if(gemm_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
// init DO, D1 to 0
reduce0_device_buf.SetZero();
reduce1_device_buf.SetZero();
......@@ -352,6 +354,12 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_gemm_name << std::endl;
......
......@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
......@@ -283,6 +284,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
std::string op_name = op_ptr->GetTypeString();
float ave_time =
......@@ -337,6 +339,12 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
......
......@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
......@@ -284,6 +285,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
std::string op_name = op_ptr->GetTypeString();
float ave_time =
......@@ -357,6 +359,12 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
......
......@@ -214,6 +214,7 @@ bool profile_batchnorm_backward_impl(bool do_verification,
std::string best_instance_name;
float best_avg_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
int num_kernel = 0;
if(do_verification)
{
......@@ -264,7 +265,6 @@ bool profile_batchnorm_backward_impl(bool do_verification,
(void)invoker_ptr_ref->Run(argument_ptr_ref.get());
}
int num_kernel = 0;
bool pass = true;
for(auto& inst_ptr : instance_ptrs)
......@@ -371,18 +371,18 @@ bool profile_batchnorm_backward_impl(bool do_verification,
};
}
if(time_kernel)
{
std::cout << "best perf = " << best_avg_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_instance_name << std::endl;
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(time_kernel)
{
std::cout << "best perf = " << best_avg_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_instance_name << std::endl;
}
return pass;
}
......
......@@ -209,6 +209,7 @@ bool profile_batchnorm_forward_impl(int do_verification,
std::string best_instance_name;
float best_avg_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
int num_kernel = 0;
if(do_verification)
{
......@@ -258,7 +259,6 @@ bool profile_batchnorm_forward_impl(int do_verification,
(void)invoker_ptr_ref->Run(argument_ptr_ref.get());
}
int num_kernel = 0;
bool pass = true;
for(auto& inst_ptr : instance_ptrs)
......@@ -393,18 +393,18 @@ bool profile_batchnorm_forward_impl(int do_verification,
};
}
if(time_kernel)
{
std::cout << "best perf = " << best_avg_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_instance_name << std::endl;
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(time_kernel)
{
std::cout << "best perf = " << best_avg_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_instance_name << std::endl;
}
return pass;
}
......
......@@ -183,6 +183,7 @@ bool profile_batchnorm_infer_impl(int do_verification,
std::string best_instance_name;
float best_avg_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
int num_kernel = 0;
if(do_verification)
{
......@@ -230,7 +231,6 @@ bool profile_batchnorm_infer_impl(int do_verification,
(void)invoker_ptr_ref->Run(argument_ptr_ref.get());
}
int num_kernel = 0;
bool pass = true;
for(auto& inst_ptr : instance_ptrs)
......@@ -316,18 +316,18 @@ bool profile_batchnorm_infer_impl(int do_verification,
};
}
if(time_kernel)
{
std::cout << "best perf = " << best_avg_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_instance_name << std::endl;
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(time_kernel)
{
std::cout << "best perf = " << best_avg_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_instance_name << std::endl;
}
return pass;
}
......
......@@ -183,6 +183,7 @@ int profile_contraction_impl(ck::index_t do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
......@@ -239,6 +240,7 @@ int profile_contraction_impl(ck::index_t do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
// re-init C to zero before profiling next kernel
e_device_buf.SetZero();
......@@ -333,6 +335,12 @@ int profile_contraction_impl(ck::index_t do_verification,
std::cout << " CDELayout = ColumnMajor";
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << " M = " << M << " N = " << N << " K = " << K << " StridesA = " << StridesA
<< " StridesB = " << StridesB << " StridesE = " << StridesE << " : " << best_avg_time
<< " ms, " << best_tflops << " TFlops, " << best_gb_per_sec << " GB/s, "
......
......@@ -151,6 +151,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device Conv instances
bool pass = true;
......@@ -177,6 +178,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
// for conv bwd data, some input tensor element are zero, but not written by kernel,
// need to set zero
in_device_buf.SetZero();
......@@ -237,6 +239,12 @@ bool profile_conv_bwd_data_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best configuration parameters:"
<< "\nname: " << best_op_name << "\navg_time: " << best_avg_time
<< "\ntflops: " << best_tflops << "\nGB/s: " << best_gb_per_sec << std::endl;
......
......@@ -192,6 +192,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device Conv instances
for(auto& op_ptr : op_ptrs)
......@@ -220,6 +221,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
std::string conv_name = op_ptr->GetTypeString();
float ave_time =
......@@ -270,6 +272,12 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_conv_name << std::endl;
}
......
......@@ -182,6 +182,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device Conv instances
for(auto& op_ptr : op_ptrs)
......@@ -209,6 +210,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
std::string conv_name = op_ptr->GetTypeString();
float ave_time =
......@@ -258,6 +260,12 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_conv_name << std::endl;
}
......
......@@ -133,6 +133,7 @@ bool profile_conv_fwd_impl(int do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
bool pass = true;
......@@ -159,6 +160,7 @@ bool profile_conv_fwd_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
// re-init output to zero before profiling next kernel
out_device_buf.SetZero();
......@@ -210,6 +212,12 @@ bool profile_conv_fwd_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best configuration parameters:"
<< "\nname: " << best_op_name << "\navg_time: " << best_avg_time
<< "\ntflops: " << best_tflops << "\nGB/s: " << best_gb_per_sec << std::endl;
......
......@@ -137,6 +137,7 @@ bool profile_elementwise_layernorm_impl(int do_verification,
std::string best_instance_name;
float best_avg_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
int num_kernel = 0;
if(do_verification)
{
......@@ -163,8 +164,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
ref_invoker.Run(ref_argument);
}
int num_kernel = 0;
for(auto& inst_ptr : instance_ptrs)
{
auto argument_ptr = inst_ptr->MakeArgumentPointer(
......@@ -246,6 +245,12 @@ bool profile_elementwise_layernorm_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(time_kernel)
{
LogRange(std::cout << "length = ", length, ",") << ", ";
......@@ -253,12 +258,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
<< best_gb_per_sec << " GB/s, " << best_instance_name << std::endl;
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is tested" << std::endl;
return false;
}
return true;
}
......
......@@ -164,6 +164,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
bool pass = true;
......@@ -193,6 +194,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
// re-init E to zero before profiling a kernel
e_device_buf.SetZero();
......@@ -232,6 +234,12 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
......
......@@ -155,6 +155,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
bool pass = true;
......@@ -183,6 +184,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
// re-init E to zero before profiling a kernel
e_device_buf.SetZero();
......@@ -222,6 +224,12 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
......
......@@ -164,6 +164,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
bool pass = true;
......@@ -193,6 +194,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
// re-init E to zero before profiling a kernel
e_device_buf.SetZero();
......@@ -232,6 +234,12 @@ bool profile_gemm_add_multiply_impl(int do_verification,
}
}
if(num_kernel == 0)
{
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment