Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
9db34134
Commit
9db34134
authored
Sep 11, 2023
by
Bartlomiej Kocot
Browse files
Fail when no kernel is applicable
parent
8f84a012
Changes
41
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
151 additions
and
48 deletions
+151
-48
profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp
...ude/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp
+7
-9
profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp
...er/include/profiler/profile_gemm_bias_add_reduce_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_bilinear_impl.hpp
profiler/include/profiler/profile_gemm_bilinear_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_fastgelu_impl.hpp
profiler/include/profiler/profile_gemm_fastgelu_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_impl.hpp
profiler/include/profiler/profile_gemm_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_multiply_add_impl.hpp
profiler/include/profiler/profile_gemm_multiply_add_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_reduce_impl.hpp
profiler/include/profiler/profile_gemm_reduce_impl.hpp
+8
-0
profiler/include/profiler/profile_gemm_splitk_impl.hpp
profiler/include/profiler/profile_gemm_splitk_impl.hpp
+8
-1
profiler/include/profiler/profile_gemm_streamk_impl.hpp
profiler/include/profiler/profile_gemm_streamk_impl.hpp
+9
-1
profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp
...r/include/profiler/profile_grouped_conv_bwd_data_impl.hpp
+8
-0
profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp
...include/profiler/profile_grouped_conv_bwd_weight_impl.hpp
+8
-0
profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp
profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp
+8
-0
profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp
...r/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp
+8
-0
profiler/include/profiler/profile_grouped_gemm_impl.hpp
profiler/include/profiler/profile_grouped_gemm_impl.hpp
+8
-0
profiler/include/profiler/profile_groupnorm_impl.hpp
profiler/include/profiler/profile_groupnorm_impl.hpp
+7
-8
profiler/include/profiler/profile_image_to_column_impl.hpp
profiler/include/profiler/profile_image_to_column_impl.hpp
+10
-4
profiler/include/profiler/profile_layernorm_impl.hpp
profiler/include/profiler/profile_layernorm_impl.hpp
+7
-8
profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp
profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp
+7
-8
profiler/include/profiler/profile_pool3d_fwd_impl.hpp
profiler/include/profiler/profile_pool3d_fwd_impl.hpp
+7
-8
profiler/include/profiler/profile_reduce_impl.hpp
profiler/include/profiler/profile_reduce_impl.hpp
+1
-1
No files found.
profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp
View file @
9db34134
...
...
@@ -249,9 +249,9 @@ bool profile_gemm_add_relu_add_layernorm_impl(int do_verification,
std
::
string
best_op_name
;
float
best_ave_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
int
num_kernel
=
0
;
// profile device operation instances
for
(
auto
&
op_ptr
:
op_ptrs
)
...
...
@@ -283,7 +283,6 @@ bool profile_gemm_add_relu_add_layernorm_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
++
num_kernel
;
size_t
workspace_sz
=
op_ptr
->
GetWorkSpaceSize
(
argument_ptr
.
get
());
DeviceMem
workspace_dev
(
workspace_sz
);
op_ptr
->
SetWorkSpacePointer
(
argument_ptr
.
get
(),
workspace_dev
.
GetDeviceBuffer
());
...
...
@@ -330,15 +329,14 @@ bool profile_gemm_add_relu_add_layernorm_impl(int do_verification,
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
pass
=
false
;
return
false
;
}
else
{
if
(
time_kernel
)
{
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
}
return
pass
;
}
...
...
profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp
View file @
9db34134
...
...
@@ -280,6 +280,7 @@ void profile_gemm_bias_add_reduce_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device GEMM instances
for
(
auto
&
gemm_ptr
:
gemm_ptrs
)
...
...
@@ -306,6 +307,7 @@ void profile_gemm_bias_add_reduce_impl(int do_verification,
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// init DO, D1 to 0
reduce0_device_buf
.
SetZero
();
reduce1_device_buf
.
SetZero
();
...
...
@@ -376,6 +378,12 @@ void profile_gemm_bias_add_reduce_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_gemm_name
<<
std
::
endl
;
}
...
...
profiler/include/profiler/profile_gemm_bilinear_impl.hpp
View file @
9db34134
...
...
@@ -157,6 +157,7 @@ bool profile_gemm_bilinear_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
...
...
@@ -185,6 +186,7 @@ bool profile_gemm_bilinear_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// re-init E to zero before profiling a kernel
e_device_buf
.
SetZero
();
...
...
@@ -224,6 +226,12 @@ bool profile_gemm_bilinear_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_gemm_fastgelu_impl.hpp
View file @
9db34134
...
...
@@ -146,6 +146,7 @@ bool profile_gemm_fastgelu_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
...
...
@@ -173,6 +174,7 @@ bool profile_gemm_fastgelu_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// re-init E to zero before profiling a kernel
e_device_buf
.
SetZero
();
...
...
@@ -212,6 +214,12 @@ bool profile_gemm_fastgelu_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_gemm_impl.hpp
View file @
9db34134
...
...
@@ -134,6 +134,7 @@ int profile_gemm_impl(int do_verification,
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
for
(
auto
&
op_ptr
:
op_ptrs
)
...
...
@@ -156,6 +157,7 @@ int profile_gemm_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// re-init C to zero before profiling next kernel
c_device_buf
.
SetZero
();
...
...
@@ -242,6 +244,12 @@ int profile_gemm_impl(int do_verification,
std
::
cout
<<
" BLayout = ColumnMajor"
;
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
" M = "
<<
M
<<
" N = "
<<
N
<<
" K = "
<<
K
<<
" StrideA = "
<<
StrideA
<<
" StrideB = "
<<
StrideB
<<
" StrideC = "
<<
StrideC
<<
" : "
<<
best_avg_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
...
...
profiler/include/profiler/profile_gemm_multiply_add_impl.hpp
View file @
9db34134
...
...
@@ -164,6 +164,7 @@ bool profile_gemm_multiply_add_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
bool
pass
=
true
;
...
...
@@ -193,6 +194,7 @@ bool profile_gemm_multiply_add_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// re-init E to zero before profiling a kernel
e_device_buf
.
SetZero
();
...
...
@@ -232,6 +234,12 @@ bool profile_gemm_multiply_add_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_gemm_reduce_impl.hpp
View file @
9db34134
...
...
@@ -249,6 +249,7 @@ bool profile_gemm_reduce_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device GEMM instances
for
(
auto
&
gemm_ptr
:
gemm_ptrs
)
...
...
@@ -275,6 +276,7 @@ bool profile_gemm_reduce_impl(int do_verification,
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// init DO, D1 to 0
reduce0_device_buf
.
SetZero
();
reduce1_device_buf
.
SetZero
();
...
...
@@ -343,6 +345,12 @@ bool profile_gemm_reduce_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_gemm_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_gemm_splitk_impl.hpp
View file @
9db34134
...
...
@@ -136,6 +136,7 @@ bool profile_gemm_splitk_impl(int do_verification,
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_kbatch
=
0
;
int
num_kernel
=
0
;
// profile device GEMM instances
for
(
auto
&
op_ptr
:
op_ptrs
)
...
...
@@ -171,7 +172,7 @@ bool profile_gemm_splitk_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// re-init C to zero before profiling next kernel
c_device_buf
.
SetZero
();
...
...
@@ -281,6 +282,12 @@ bool profile_gemm_splitk_impl(int do_verification,
std
::
cout
<<
" BLayout = ColumnMajor"
;
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
" M = "
<<
M
<<
" N = "
<<
N
<<
" K = "
<<
K
<<
" StrideA = "
<<
StrideA
<<
" StrideB = "
<<
StrideB
<<
" StrideC = "
<<
StrideC
<<
" KBatch = "
<<
best_kbatch
<<
" : "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
...
...
profiler/include/profiler/profile_gemm_streamk_impl.hpp
View file @
9db34134
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -137,6 +137,7 @@ bool profile_gemm_streamk_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device GEMM instances
for
(
auto
&
op_ptr
:
op_ptrs
)
...
...
@@ -167,6 +168,7 @@ bool profile_gemm_streamk_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// re-init C to zero before profiling next kernel
c_device_buf
.
SetZero
();
...
...
@@ -255,6 +257,12 @@ bool profile_gemm_streamk_impl(int do_verification,
std
::
cout
<<
" BLayout = ColumnMajor"
;
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
" M = "
<<
M
<<
" N = "
<<
N
<<
" K = "
<<
K
<<
" StrideA = "
<<
StrideA
<<
" StrideB = "
<<
StrideB
<<
" StrideC = "
<<
StrideC
<<
" : "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
...
...
profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp
View file @
9db34134
...
...
@@ -120,6 +120,7 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
bool
pass
=
true
;
...
...
@@ -127,6 +128,7 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
auto
run_impl
=
[
&
](
auto
&
op_ptr
,
auto
&
argument_ptr
)
{
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// re-init output to zero before profiling next kernel
in_device_buf
.
SetZero
();
...
...
@@ -246,6 +248,12 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
run_impl
(
op_ptr
,
argument_ptr
);
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp
View file @
9db34134
...
...
@@ -132,6 +132,7 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device Conv instances
bool
all_pass
=
true
;
...
...
@@ -183,6 +184,7 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// using atomic add, so need to reset input
wei_device_buf
.
SetZero
();
...
...
@@ -246,6 +248,12 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp
View file @
9db34134
...
...
@@ -140,6 +140,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
float
best_avg_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
bool
pass
=
true
;
...
...
@@ -147,6 +148,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
auto
run_impl
=
[
&
](
auto
&
op_ptr
,
auto
&
argument_ptr
)
{
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
// re-init output to zero before profiling next kernel
out_device_buf
.
SetZero
();
...
...
@@ -242,6 +244,12 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
run_impl
(
op_ptr
,
argument_ptr
);
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp
View file @
9db34134
...
...
@@ -166,6 +166,7 @@ bool profile_grouped_gemm_fastgelu_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
auto
p_ds
=
std
::
vector
<
std
::
array
<
const
void
*
,
0
>>
{};
...
...
@@ -181,6 +182,7 @@ bool profile_grouped_gemm_fastgelu_impl(int do_verification,
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
float
ave_time
=
...
...
@@ -270,6 +272,12 @@ bool profile_grouped_gemm_fastgelu_impl(int do_verification,
std
::
cout
<<
"Verification: "
<<
(
pass
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_gemm_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_grouped_gemm_impl.hpp
View file @
9db34134
...
...
@@ -174,6 +174,7 @@ bool profile_grouped_gemm_impl(int do_verification,
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_kbatch
=
0
;
int
num_kernel
=
0
;
auto
p_ds
=
std
::
vector
<
std
::
array
<
const
void
*
,
0
>>
{};
...
...
@@ -258,6 +259,7 @@ bool profile_grouped_gemm_impl(int do_verification,
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
num_kernel
++
;
for
(
std
::
size_t
i
=
0
;
i
<
gemm_descs
.
size
();
i
++
)
c_device_buf
[
i
]
->
SetZero
();
...
...
@@ -347,6 +349,12 @@ bool profile_grouped_gemm_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
if
(
time_kernel
)
{
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
...
...
profiler/include/profiler/profile_groupnorm_impl.hpp
View file @
9db34134
...
...
@@ -94,6 +94,7 @@ bool profile_groupnorm_impl(int do_verification,
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
{
...
...
@@ -110,8 +111,6 @@ bool profile_groupnorm_impl(int do_verification,
ref_invoker
.
Run
(
ref_argument
);
}
int
num_kernel
=
0
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
...
...
@@ -192,6 +191,12 @@ bool profile_groupnorm_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
if
(
time_kernel
)
{
LogRange
(
std
::
cout
<<
"length = "
,
length
,
","
)
<<
std
::
endl
;
...
...
@@ -199,12 +204,6 @@ bool profile_groupnorm_impl(int do_verification,
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
...
...
profiler/include/profiler/profile_image_to_column_impl.hpp
View file @
9db34134
...
...
@@ -124,10 +124,10 @@ bool profile_image_to_column_impl(int do_verification,
std
::
string
best_op_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
// profile device op instances
bool
pass
=
true
;
bool
is_supporting_instance
=
false
;
for
(
auto
&
op_ptr
:
op_ptrs
)
{
...
...
@@ -148,7 +148,7 @@ bool profile_image_to_column_impl(int do_verification,
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
is_supporting_instance
=
true
;
num_kernel
++
;
// re-init output to zero before profiling next kernel
out_device_buf
.
SetZero
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
...
...
@@ -189,11 +189,17 @@ bool profile_image_to_column_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
return
is_supporting_instance
&&
pass
;
return
pass
;
}
}
// namespace profiler
...
...
profiler/include/profiler/profile_layernorm_impl.hpp
View file @
9db34134
...
...
@@ -102,6 +102,7 @@ bool profile_layernorm_impl(int do_verification,
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
{
...
...
@@ -121,8 +122,6 @@ bool profile_layernorm_impl(int do_verification,
ref_invoker
.
Run
(
ref_argument
);
}
int
num_kernel
=
0
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
length
,
...
...
@@ -209,6 +208,12 @@ bool profile_layernorm_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
if
(
time_kernel
)
{
LogRange
(
std
::
cout
<<
"length = "
,
length
,
","
)
<<
", "
;
...
...
@@ -218,12 +223,6 @@ bool profile_layernorm_impl(int do_verification,
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
...
...
profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp
View file @
9db34134
...
...
@@ -158,6 +158,7 @@ bool profile_max_pool3d_bwd_impl(int do_verification,
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
{
...
...
@@ -175,8 +176,6 @@ bool profile_max_pool3d_bwd_impl(int do_verification,
ref_invoker
.
Run
(
ref_pooling_bwd_argument
);
}
int
num_kernel
=
0
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
...
...
@@ -268,6 +267,12 @@ bool profile_max_pool3d_bwd_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
if
(
time_kernel
)
{
LogRange
(
std
::
cout
<<
"length = "
,
out_length
,
","
)
<<
std
::
endl
;
...
...
@@ -275,12 +280,6 @@ bool profile_max_pool3d_bwd_impl(int do_verification,
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
...
...
profiler/include/profiler/profile_pool3d_fwd_impl.hpp
View file @
9db34134
...
...
@@ -124,6 +124,7 @@ bool profile_pool3d_fwd_impl(int do_verification,
std
::
string
best_instance_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
int
num_kernel
=
0
;
if
(
do_verification
)
{
...
...
@@ -150,8 +151,6 @@ bool profile_pool3d_fwd_impl(int do_verification,
ref_invoker
.
Run
(
ref_argument
);
}
int
num_kernel
=
0
;
for
(
auto
&
inst_ptr
:
instance_ptrs
)
{
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
...
...
@@ -260,6 +259,12 @@ bool profile_pool3d_fwd_impl(int do_verification,
}
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
if
(
time_kernel
)
{
LogRange
(
std
::
cout
<<
"length = "
,
in_length
,
","
)
<<
std
::
endl
;
...
...
@@ -267,12 +272,6 @@ bool profile_pool3d_fwd_impl(int do_verification,
<<
best_instance_name
<<
std
::
endl
;
}
if
(
num_kernel
==
0
)
{
std
::
cout
<<
"Error: No kernel is applicable"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
...
...
profiler/include/profiler/profile_reduce_impl.hpp
View file @
9db34134
...
...
@@ -195,8 +195,8 @@ bool profile_reduce_impl_impl(bool do_verification,
constexpr
bool
invalid_reduce
=
(
invalid_reduce_1
||
invalid_reduce_2
||
invalid_reduce_3
||
invalid_reduce_4
||
invalid_reduce_5
||
invalid_reduce_6
);
int
num_kernel
=
0
;
bool
pass
=
true
;
int
num_kernel
=
0
;
if
constexpr
(
!
invalid_reduce
)
{
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment