Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
9db34134
Commit
9db34134
authored
Sep 11, 2023
by
Bartlomiej Kocot
Browse files
Fail when no kernel is applicable
parent
8f84a012
Changes
41
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
152 additions
and
34 deletions
+152
-34
profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp
profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp
+1
-2
profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp
.../profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp
...r/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp
profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_impl.hpp
profiler/include/profiler/profile_batched_gemm_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp
...ler/include/profiler/profile_batched_gemm_reduce_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp
...clude/profiler/profile_batched_gemm_softmax_gemm_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp
...ofiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp
+8
-0
profiler/include/profiler/profile_batchnorm_backward_impl.hpp
...iler/include/profiler/profile_batchnorm_backward_impl.hpp
+8
-8
profiler/include/profiler/profile_batchnorm_forward_impl.hpp
profiler/include/profiler/profile_batchnorm_forward_impl.hpp
+8
-8
profiler/include/profiler/profile_batchnorm_infer_impl.hpp
profiler/include/profiler/profile_batchnorm_infer_impl.hpp
+8
-8
profiler/include/profiler/profile_contraction_impl.hpp
profiler/include/profiler/profile_contraction_impl.hpp
+8
-0
profiler/include/profiler/profile_conv_bwd_data_impl.hpp
profiler/include/profiler/profile_conv_bwd_data_impl.hpp
+8
-0
profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp
.../include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp
+8
-0
profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp
...iler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp
+8
-0
profiler/include/profiler/profile_conv_fwd_impl.hpp
profiler/include/profiler/profile_conv_fwd_impl.hpp
+8
-0
profiler/include/profiler/profile_elementwise_layernorm_impl.hpp
...r/include/profiler/profile_elementwise_layernorm_impl.hpp
+7
-8
profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp
...r/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp
profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_add_multiply_impl.hpp
profiler/include/profiler/profile_gemm_add_multiply_impl.hpp
+8
-0
No files found.
profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp
View file @
9db34134
...
@@ -126,6 +126,7 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
...
@@ -126,6 +126,7 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -145,8 +146,6 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
...
@@ -145,8 +146,6 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
ref_invoker
.
Run
(
ref_pooling_bwd_argument
);
ref_invoker
.
Run
(
ref_pooling_bwd_argument
);
}
}
int
num_kernel
=
0
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
{
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
...
...
profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp
View file @
9db34134
...
@@ -263,6 +263,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
...
@@ -263,6 +263,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -301,6 +302,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
...
@@ -301,6 +302,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -350,6 +352,12 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
...
@@ -350,6 +352,12 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp
View file @
9db34134
...
@@ -273,6 +273,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
...
@@ -273,6 +273,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -310,6 +311,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
...
@@ -310,6 +311,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -385,6 +387,12 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
...
@@ -385,6 +387,12 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp
View file @
9db34134
...
@@ -223,6 +223,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
...
@@ -223,6 +223,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -255,6 +256,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
...
@@ -255,6 +256,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -309,6 +311,12 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
...
@@ -309,6 +311,12 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_impl.hpp
View file @
9db34134
...
@@ -136,6 +136,7 @@ bool profile_batched_gemm_impl(int do_verification,
...
@@ -136,6 +136,7 @@ bool profile_batched_gemm_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -201,6 +202,7 @@ bool profile_batched_gemm_impl(int do_verification,
...
@@ -201,6 +202,7 @@ bool profile_batched_gemm_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init C to zero before profiling next kernel
// re-init C to zero before profiling next kernel
c_device_buf
.
SetZero
();
c_device_buf
.
SetZero
();
...
@@ -254,6 +256,12 @@ bool profile_batched_gemm_impl(int do_verification,
...
@@ -254,6 +256,12 @@ bool profile_batched_gemm_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp
View file @
9db34134
...
@@ -254,6 +254,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
...
@@ -254,6 +254,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device GEMM instances
// profile device GEMM instances
for
(
auto
&
gemm_ptr
:
gemm_ptrs
)
for
(
auto
&
gemm_ptr
:
gemm_ptrs
)
...
@@ -281,6 +282,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
...
@@ -281,6 +282,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// init DO, D1 to 0
// init DO, D1 to 0
reduce0_device_buf
.
SetZero
();
reduce0_device_buf
.
SetZero
();
reduce1_device_buf
.
SetZero
();
reduce1_device_buf
.
SetZero
();
...
@@ -352,6 +354,12 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
...
@@ -352,6 +354,12 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_gemm_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_gemm_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp
View file @
9db34134
...
@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
...
@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -283,6 +284,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
...
@@ -283,6 +284,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -337,6 +339,12 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
...
@@ -337,6 +339,12 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp
View file @
9db34134
...
@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
...
@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -284,6 +285,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
...
@@ -284,6 +285,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -357,6 +359,12 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
...
@@ -357,6 +359,12 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batchnorm_backward_impl.hpp
View file @
9db34134
...
@@ -214,6 +214,7 @@ bool profile_batchnorm_backward_impl(bool do_verification,
...
@@ -214,6 +214,7 @@ bool profile_batchnorm_backward_impl(bool do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -264,7 +265,6 @@ bool profile_batchnorm_backward_impl(bool do_verification,
...
@@ -264,7 +265,6 @@ bool profile_batchnorm_backward_impl(bool do_verification,
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
}
}
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
...
@@ -371,18 +371,18 @@ bool profile_batchnorm_backward_impl(bool do_verification,
...
@@ -371,18 +371,18 @@ bool profile_batchnorm_backward_impl(bool do_verification,
};
};
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
if
(
num_kernel
==
0
)
{
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
return
false
;
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
return
pass
;
return
pass
;
}
}
...
...
profiler/include/profiler/profile_batchnorm_forward_impl.hpp
View file @
9db34134
...
@@ -209,6 +209,7 @@ bool profile_batchnorm_forward_impl(int do_verification,
...
@@ -209,6 +209,7 @@ bool profile_batchnorm_forward_impl(int do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -258,7 +259,6 @@ bool profile_batchnorm_forward_impl(int do_verification,
...
@@ -258,7 +259,6 @@ bool profile_batchnorm_forward_impl(int do_verification,
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
}
}
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
...
@@ -393,18 +393,18 @@ bool profile_batchnorm_forward_impl(int do_verification,
...
@@ -393,18 +393,18 @@ bool profile_batchnorm_forward_impl(int do_verification,
};
};
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
if
(
num_kernel
==
0
)
{
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
return
false
;
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
return
pass
;
return
pass
;
}
}
...
...
profiler/include/profiler/profile_batchnorm_infer_impl.hpp
View file @
9db34134
...
@@ -183,6 +183,7 @@ bool profile_batchnorm_infer_impl(int do_verification,
...
@@ -183,6 +183,7 @@ bool profile_batchnorm_infer_impl(int do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -230,7 +231,6 @@ bool profile_batchnorm_infer_impl(int do_verification,
...
@@ -230,7 +231,6 @@ bool profile_batchnorm_infer_impl(int do_verification,
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
}
}
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
...
@@ -316,18 +316,18 @@ bool profile_batchnorm_infer_impl(int do_verification,
...
@@ -316,18 +316,18 @@ bool profile_batchnorm_infer_impl(int do_verification,
};
};
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
if
(
num_kernel
==
0
)
{
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
return
false
;
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
return
pass
;
return
pass
;
}
}
...
...
profiler/include/profiler/profile_contraction_impl.hpp
View file @
9db34134
...
@@ -183,6 +183,7 @@ int profile_contraction_impl(ck::index_t do_verification,
...
@@ -183,6 +183,7 @@ int profile_contraction_impl(ck::index_t do_verification,
float
best_avg_time
=
0
;
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -239,6 +240,7 @@ int profile_contraction_impl(ck::index_t do_verification,
...
@@ -239,6 +240,7 @@ int profile_contraction_impl(ck::index_t do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init C to zero before profiling next kernel
// re-init C to zero before profiling next kernel
e_device_buf
.
SetZero
();
e_device_buf
.
SetZero
();
...
@@ -333,6 +335,12 @@ int profile_contraction_impl(ck::index_t do_verification,
...
@@ -333,6 +335,12 @@ int profile_contraction_impl(ck::index_t do_verification,
std
::
cout
<<
" CDELayout = ColumnMajor"
;
std
::
cout
<<
" CDELayout = ColumnMajor"
;
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
" M = "
<<
M
<<
" N = "
<<
N
<<
" K = "
<<
K
<<
" StridesA = "
<<
StridesA
std
::
cout
<<
" M = "
<<
M
<<
" N = "
<<
N
<<
" K = "
<<
K
<<
" StridesA = "
<<
StridesA
<<
" StridesB = "
<<
StridesB
<<
" StridesE = "
<<
StridesE
<<
" : "
<<
best_avg_time
<<
" StridesB = "
<<
StridesB
<<
" StridesE = "
<<
StridesE
<<
" : "
<<
best_avg_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
...
...
profiler/include/profiler/profile_conv_bwd_data_impl.hpp
View file @
9db34134
...
@@ -151,6 +151,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
...
@@ -151,6 +151,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
float
best_avg_time
=
0
;
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device Conv instances
// profile device Conv instances
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -177,6 +178,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
...
@@ -177,6 +178,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// for conv bwd data, some input tensor element are zero, but not written by kernel,
// for conv bwd data, some input tensor element are zero, but not written by kernel,
// need to set zero
// need to set zero
in_device_buf
.
SetZero
();
in_device_buf
.
SetZero
();
...
@@ -237,6 +239,12 @@ bool profile_conv_bwd_data_impl(int do_verification,
...
@@ -237,6 +239,12 @@ bool profile_conv_bwd_data_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best configuration parameters:"
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp
View file @
9db34134
...
@@ -192,6 +192,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
...
@@ -192,6 +192,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device Conv instances
// profile device Conv instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -220,6 +221,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
...
@@ -220,6 +221,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
conv_name
=
op_ptr
->
GetTypeString
();
std
::
string
conv_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -270,6 +272,12 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
...
@@ -270,6 +272,12 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_conv_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_conv_name
<<
std
::
endl
;
}
}
...
...
profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp
View file @
9db34134
...
@@ -182,6 +182,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
...
@@ -182,6 +182,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device Conv instances
// profile device Conv instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -209,6 +210,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
...
@@ -209,6 +210,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
conv_name
=
op_ptr
->
GetTypeString
();
std
::
string
conv_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -258,6 +260,12 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
...
@@ -258,6 +260,12 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_conv_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_conv_name
<<
std
::
endl
;
}
}
...
...
profiler/include/profiler/profile_conv_fwd_impl.hpp
View file @
9db34134
...
@@ -133,6 +133,7 @@ bool profile_conv_fwd_impl(int do_verification,
...
@@ -133,6 +133,7 @@ bool profile_conv_fwd_impl(int do_verification,
float
best_avg_time
=
0
;
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -159,6 +160,7 @@ bool profile_conv_fwd_impl(int do_verification,
...
@@ -159,6 +160,7 @@ bool profile_conv_fwd_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init output to zero before profiling next kernel
// re-init output to zero before profiling next kernel
out_device_buf
.
SetZero
();
out_device_buf
.
SetZero
();
...
@@ -210,6 +212,12 @@ bool profile_conv_fwd_impl(int do_verification,
...
@@ -210,6 +212,12 @@ bool profile_conv_fwd_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best configuration parameters:"
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_elementwise_layernorm_impl.hpp
View file @
9db34134
...
@@ -137,6 +137,7 @@ bool profile_elementwise_layernorm_impl(int do_verification,
...
@@ -137,6 +137,7 @@ bool profile_elementwise_layernorm_impl(int do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -163,8 +164,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
...
@@ -163,8 +164,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
ref_invoker
.
Run
(
ref_argument
);
ref_invoker
.
Run
(
ref_argument
);
}
}
int
num_kernel
=
0
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
{
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
...
@@ -246,6 +245,12 @@ bool profile_elementwise_layernorm_impl(int do_verification,
...
@@ -246,6 +245,12 @@ bool profile_elementwise_layernorm_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
if
(
time_kernel
)
if
(
time_kernel
)
{
{
LogRange
(
std
::
cout
<<
"length = "
,
length
,
","
)
<<
", "
;
LogRange
(
std
::
cout
<<
"length = "
,
length
,
","
)
<<
", "
;
...
@@ -253,12 +258,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
...
@@ -253,12 +258,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is tested"
<<
std
::
endl
;
return
false
;
}
return
true
;
return
true
;
}
}
...
...
profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp
View file @
9db34134
...
@@ -164,6 +164,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
...
@@ -164,6 +164,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -193,6 +194,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
...
@@ -193,6 +194,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init E to zero before profiling a kernel
// re-init E to zero before profiling a kernel
e_device_buf
.
SetZero
();
e_device_buf
.
SetZero
();
...
@@ -232,6 +234,12 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
...
@@ -232,6 +234,12 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp
View file @
9db34134
...
@@ -155,6 +155,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
...
@@ -155,6 +155,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -183,6 +184,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
...
@@ -183,6 +184,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init E to zero before profiling a kernel
// re-init E to zero before profiling a kernel
e_device_buf
.
SetZero
();
e_device_buf
.
SetZero
();
...
@@ -222,6 +224,12 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
...
@@ -222,6 +224,12 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_gemm_add_multiply_impl.hpp
View file @
9db34134
...
@@ -164,6 +164,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
...
@@ -164,6 +164,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -193,6 +194,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
...
@@ -193,6 +194,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init E to zero before profiling a kernel
// re-init E to zero before profiling a kernel
e_device_buf
.
SetZero
();
e_device_buf
.
SetZero
();
...
@@ -232,6 +234,12 @@ bool profile_gemm_add_multiply_impl(int do_verification,
...
@@ -232,6 +234,12 @@ bool profile_gemm_add_multiply_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment