Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
9db34134
"git@developer.sourcefind.cn:change/sglang.git" did not exist on "6d6e24bcc473a8f7ea9d3da178cbc53624bf9814"
Commit
9db34134
authored
Sep 11, 2023
by
Bartlomiej Kocot
Browse files
Fail when no kernel is applicable
parent
8f84a012
Changes
41
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
152 additions
and
34 deletions
+152
-34
profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp
profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp
+1
-2
profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp
.../profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp
...r/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp
profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_impl.hpp
profiler/include/profiler/profile_batched_gemm_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp
...ler/include/profiler/profile_batched_gemm_reduce_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp
...clude/profiler/profile_batched_gemm_softmax_gemm_impl.hpp
+8
-0
profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp
...ofiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp
+8
-0
profiler/include/profiler/profile_batchnorm_backward_impl.hpp
...iler/include/profiler/profile_batchnorm_backward_impl.hpp
+8
-8
profiler/include/profiler/profile_batchnorm_forward_impl.hpp
profiler/include/profiler/profile_batchnorm_forward_impl.hpp
+8
-8
profiler/include/profiler/profile_batchnorm_infer_impl.hpp
profiler/include/profiler/profile_batchnorm_infer_impl.hpp
+8
-8
profiler/include/profiler/profile_contraction_impl.hpp
profiler/include/profiler/profile_contraction_impl.hpp
+8
-0
profiler/include/profiler/profile_conv_bwd_data_impl.hpp
profiler/include/profiler/profile_conv_bwd_data_impl.hpp
+8
-0
profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp
.../include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp
+8
-0
profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp
...iler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp
+8
-0
profiler/include/profiler/profile_conv_fwd_impl.hpp
profiler/include/profiler/profile_conv_fwd_impl.hpp
+8
-0
profiler/include/profiler/profile_elementwise_layernorm_impl.hpp
...r/include/profiler/profile_elementwise_layernorm_impl.hpp
+7
-8
profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp
...r/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp
profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_add_multiply_impl.hpp
profiler/include/profiler/profile_gemm_add_multiply_impl.hpp
+8
-0
No files found.
profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp
View file @
9db34134
...
@@ -126,6 +126,7 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
...
@@ -126,6 +126,7 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -145,8 +146,6 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
...
@@ -145,8 +146,6 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
ref_invoker
.
Run
(
ref_pooling_bwd_argument
);
ref_invoker
.
Run
(
ref_pooling_bwd_argument
);
}
}
int
num_kernel
=
0
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
{
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
...
...
profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp
View file @
9db34134
...
@@ -263,6 +263,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
...
@@ -263,6 +263,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -301,6 +302,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
...
@@ -301,6 +302,7 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -350,6 +352,12 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
...
@@ -350,6 +352,12 @@ bool profile_batched_gemm_add_relu_gemm_add_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp
View file @
9db34134
...
@@ -273,6 +273,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
...
@@ -273,6 +273,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -310,6 +311,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
...
@@ -310,6 +311,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -385,6 +387,12 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
...
@@ -385,6 +387,12 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp
View file @
9db34134
...
@@ -223,6 +223,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
...
@@ -223,6 +223,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -255,6 +256,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
...
@@ -255,6 +256,7 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -309,6 +311,12 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
...
@@ -309,6 +311,12 @@ bool profile_batched_gemm_gemm_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_impl.hpp
View file @
9db34134
...
@@ -136,6 +136,7 @@ bool profile_batched_gemm_impl(int do_verification,
...
@@ -136,6 +136,7 @@ bool profile_batched_gemm_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -201,6 +202,7 @@ bool profile_batched_gemm_impl(int do_verification,
...
@@ -201,6 +202,7 @@ bool profile_batched_gemm_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init C to zero before profiling next kernel
// re-init C to zero before profiling next kernel
c_device_buf
.
SetZero
();
c_device_buf
.
SetZero
();
...
@@ -254,6 +256,12 @@ bool profile_batched_gemm_impl(int do_verification,
...
@@ -254,6 +256,12 @@ bool profile_batched_gemm_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp
View file @
9db34134
...
@@ -254,6 +254,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
...
@@ -254,6 +254,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device GEMM instances
// profile device GEMM instances
for
(
auto
&
gemm_ptr
:
gemm_ptrs
)
for
(
auto
&
gemm_ptr
:
gemm_ptrs
)
...
@@ -281,6 +282,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
...
@@ -281,6 +282,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// init DO, D1 to 0
// init DO, D1 to 0
reduce0_device_buf
.
SetZero
();
reduce0_device_buf
.
SetZero
();
reduce1_device_buf
.
SetZero
();
reduce1_device_buf
.
SetZero
();
...
@@ -352,6 +354,12 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
...
@@ -352,6 +354,12 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_gemm_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_gemm_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp
View file @
9db34134
...
@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
...
@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -283,6 +284,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
...
@@ -283,6 +284,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -337,6 +339,12 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
...
@@ -337,6 +339,12 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp
View file @
9db34134
...
@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
...
@@ -251,6 +251,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -284,6 +285,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
...
@@ -284,6 +285,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -357,6 +359,12 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
...
@@ -357,6 +359,12 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_batchnorm_backward_impl.hpp
View file @
9db34134
...
@@ -214,6 +214,7 @@ bool profile_batchnorm_backward_impl(bool do_verification,
...
@@ -214,6 +214,7 @@ bool profile_batchnorm_backward_impl(bool do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -264,8 +265,7 @@ bool profile_batchnorm_backward_impl(bool do_verification,
...
@@ -264,8 +265,7 @@ bool profile_batchnorm_backward_impl(bool do_verification,
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
}
}
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
{
...
@@ -371,18 +371,18 @@ bool profile_batchnorm_backward_impl(bool do_verification,
...
@@ -371,18 +371,18 @@ bool profile_batchnorm_backward_impl(bool do_verification,
};
};
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
if
(
num_kernel
==
0
)
{
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
return
false
;
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
return
pass
;
return
pass
;
}
}
...
...
profiler/include/profiler/profile_batchnorm_forward_impl.hpp
View file @
9db34134
...
@@ -209,6 +209,7 @@ bool profile_batchnorm_forward_impl(int do_verification,
...
@@ -209,6 +209,7 @@ bool profile_batchnorm_forward_impl(int do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -258,8 +259,7 @@ bool profile_batchnorm_forward_impl(int do_verification,
...
@@ -258,8 +259,7 @@ bool profile_batchnorm_forward_impl(int do_verification,
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
}
}
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
{
...
@@ -393,18 +393,18 @@ bool profile_batchnorm_forward_impl(int do_verification,
...
@@ -393,18 +393,18 @@ bool profile_batchnorm_forward_impl(int do_verification,
};
};
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
if
(
num_kernel
==
0
)
{
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
return
false
;
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
return
pass
;
return
pass
;
}
}
...
...
profiler/include/profiler/profile_batchnorm_infer_impl.hpp
View file @
9db34134
...
@@ -183,6 +183,7 @@ bool profile_batchnorm_infer_impl(int do_verification,
...
@@ -183,6 +183,7 @@ bool profile_batchnorm_infer_impl(int do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -230,8 +231,7 @@ bool profile_batchnorm_infer_impl(int do_verification,
...
@@ -230,8 +231,7 @@ bool profile_batchnorm_infer_impl(int do_verification,
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
(
void
)
invoker_ptr_ref
->
Run
(
argument_ptr_ref
.
get
());
}
}
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
{
...
@@ -316,18 +316,18 @@ bool profile_batchnorm_infer_impl(int do_verification,
...
@@ -316,18 +316,18 @@ bool profile_batchnorm_infer_impl(int do_verification,
};
};
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
if
(
num_kernel
==
0
)
{
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
return
false
;
}
}
if
(
time_kernel
)
{
std
::
cout
<<
"best perf = "
<<
best_avg_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
return
pass
;
return
pass
;
}
}
...
...
profiler/include/profiler/profile_contraction_impl.hpp
View file @
9db34134
...
@@ -183,6 +183,7 @@ int profile_contraction_impl(ck::index_t do_verification,
...
@@ -183,6 +183,7 @@ int profile_contraction_impl(ck::index_t do_verification,
float
best_avg_time
=
0
;
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -239,6 +240,7 @@ int profile_contraction_impl(ck::index_t do_verification,
...
@@ -239,6 +240,7 @@ int profile_contraction_impl(ck::index_t do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init C to zero before profiling next kernel
// re-init C to zero before profiling next kernel
e_device_buf
.
SetZero
();
e_device_buf
.
SetZero
();
...
@@ -333,6 +335,12 @@ int profile_contraction_impl(ck::index_t do_verification,
...
@@ -333,6 +335,12 @@ int profile_contraction_impl(ck::index_t do_verification,
std
::
cout
<<
" CDELayout = ColumnMajor"
;
std
::
cout
<<
" CDELayout = ColumnMajor"
;
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
" M = "
<<
M
<<
" N = "
<<
N
<<
" K = "
<<
K
<<
" StridesA = "
<<
StridesA
std
::
cout
<<
" M = "
<<
M
<<
" N = "
<<
N
<<
" K = "
<<
K
<<
" StridesA = "
<<
StridesA
<<
" StridesB = "
<<
StridesB
<<
" StridesE = "
<<
StridesE
<<
" : "
<<
best_avg_time
<<
" StridesB = "
<<
StridesB
<<
" StridesE = "
<<
StridesE
<<
" : "
<<
best_avg_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
...
...
profiler/include/profiler/profile_conv_bwd_data_impl.hpp
View file @
9db34134
...
@@ -151,6 +151,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
...
@@ -151,6 +151,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
float
best_avg_time
=
0
;
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device Conv instances
// profile device Conv instances
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -177,6 +178,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
...
@@ -177,6 +178,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// for conv bwd data, some input tensor element are zero, but not written by kernel,
// for conv bwd data, some input tensor element are zero, but not written by kernel,
// need to set zero
// need to set zero
in_device_buf
.
SetZero
();
in_device_buf
.
SetZero
();
...
@@ -237,6 +239,12 @@ bool profile_conv_bwd_data_impl(int do_verification,
...
@@ -237,6 +239,12 @@ bool profile_conv_bwd_data_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best configuration parameters:"
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp
View file @
9db34134
...
@@ -192,6 +192,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
...
@@ -192,6 +192,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device Conv instances
// profile device Conv instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -220,6 +221,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
...
@@ -220,6 +221,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
conv_name
=
op_ptr
->
GetTypeString
();
std
::
string
conv_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -270,6 +272,12 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
...
@@ -270,6 +272,12 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_conv_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_conv_name
<<
std
::
endl
;
}
}
...
...
profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp
View file @
9db34134
...
@@ -182,6 +182,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
...
@@ -182,6 +182,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device Conv instances
// profile device Conv instances
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
...
@@ -209,6 +210,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
...
@@ -209,6 +210,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
std
::
string
conv_name
=
op_ptr
->
GetTypeString
();
std
::
string
conv_name
=
op_ptr
->
GetTypeString
();
float
ave_time
=
float
ave_time
=
...
@@ -258,6 +260,12 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
...
@@ -258,6 +260,12 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_conv_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_conv_name
<<
std
::
endl
;
}
}
...
...
profiler/include/profiler/profile_conv_fwd_impl.hpp
View file @
9db34134
...
@@ -133,6 +133,7 @@ bool profile_conv_fwd_impl(int do_verification,
...
@@ -133,6 +133,7 @@ bool profile_conv_fwd_impl(int do_verification,
float
best_avg_time
=
0
;
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
// profile device op instances
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -159,6 +160,7 @@ bool profile_conv_fwd_impl(int do_verification,
...
@@ -159,6 +160,7 @@ bool profile_conv_fwd_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init output to zero before profiling next kernel
// re-init output to zero before profiling next kernel
out_device_buf
.
SetZero
();
out_device_buf
.
SetZero
();
...
@@ -210,6 +212,12 @@ bool profile_conv_fwd_impl(int do_verification,
...
@@ -210,6 +212,12 @@ bool profile_conv_fwd_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best configuration parameters:"
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_elementwise_layernorm_impl.hpp
View file @
9db34134
...
@@ -137,6 +137,7 @@ bool profile_elementwise_layernorm_impl(int do_verification,
...
@@ -137,6 +137,7 @@ bool profile_elementwise_layernorm_impl(int do_verification,
std
::
string
best_instance_name
;
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
@@ -163,8 +164,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
...
@@ -163,8 +164,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
ref_invoker
.
Run
(
ref_argument
);
ref_invoker
.
Run
(
ref_argument
);
}
}
int
num_kernel
=
0
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
{
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
...
@@ -246,6 +245,12 @@ bool profile_elementwise_layernorm_impl(int do_verification,
...
@@ -246,6 +245,12 @@ bool profile_elementwise_layernorm_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
if
(
time_kernel
)
if
(
time_kernel
)
{
{
LogRange
(
std
::
cout
<<
"length = "
,
length
,
","
)
<<
", "
;
LogRange
(
std
::
cout
<<
"length = "
,
length
,
","
)
<<
", "
;
...
@@ -253,12 +258,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
...
@@ -253,12 +258,6 @@ bool profile_elementwise_layernorm_impl(int do_verification,
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_instance_name
<<
std
::
endl
;
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is tested"
<<
std
::
endl
;
return
false
;
}
return
true
;
return
true
;
}
}
...
...
profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp
View file @
9db34134
...
@@ -164,6 +164,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
...
@@ -164,6 +164,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -193,6 +194,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
...
@@ -193,6 +194,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init E to zero before profiling a kernel
// re-init E to zero before profiling a kernel
e_device_buf
.
SetZero
();
e_device_buf
.
SetZero
();
...
@@ -232,6 +234,12 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
...
@@ -232,6 +234,12 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp
View file @
9db34134
...
@@ -155,6 +155,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
...
@@ -155,6 +155,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -183,6 +184,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
...
@@ -183,6 +184,7 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init E to zero before profiling a kernel
// re-init E to zero before profiling a kernel
e_device_buf
.
SetZero
();
e_device_buf
.
SetZero
();
...
@@ -222,6 +224,12 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
...
@@ -222,6 +224,12 @@ bool profile_gemm_add_fastgelu_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_gemm_add_multiply_impl.hpp
View file @
9db34134
...
@@ -164,6 +164,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
...
@@ -164,6 +164,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
bool
pass
=
true
;
...
@@ -193,6 +194,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
...
@@ -193,6 +194,7 @@ bool profile_gemm_add_multiply_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
num_kernel
++
;
// re-init E to zero before profiling a kernel
// re-init E to zero before profiling a kernel
e_device_buf
.
SetZero
();
e_device_buf
.
SetZero
();
...
@@ -232,6 +234,12 @@ bool profile_gemm_add_multiply_impl(int do_verification,
...
@@ -232,6 +234,12 @@ bool profile_gemm_add_multiply_impl(int do_verification,
}
}
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment