Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
5394ac66
"git@developer.sourcefind.cn:gaoqiong/composable_kernel.git" did not exist on "a054f7d604d3bfee9e4ad410df15397bc354ae3d"
Commit
5394ac66
authored
Aug 30, 2023
by
Bartlomiej Kocot
Browse files
Several fixes of image to column profiler
parent
5c704e2d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
23 deletions
+8
-23
example/52_image_to_column/image_to_column_f32.cpp
example/52_image_to_column/image_to_column_f32.cpp
+3
-4
profiler/include/profiler/profile_image_to_column_impl.hpp
profiler/include/profiler/profile_image_to_column_impl.hpp
+5
-19
No files found.
example/52_image_to_column/image_to_column_f32.cpp
View file @
5394ac66
...
@@ -106,10 +106,9 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv
...
@@ -106,10 +106,9 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv
return
false
;
return
false
;
}
}
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
});
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
});
std
::
size_t
num_btype
=
std
::
size_t
num_btype
=
2
*
NDoHoWo
*
CZYX
*
sizeof
(
OutDataType
);
NDoHoWo
*
CZYX
*
sizeof
(
OutDataType
)
+
conv_params
.
GetInputByte
<
InputDataType
>
();
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
endl
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
endl
;
if
(
config
.
do_verification
)
if
(
config
.
do_verification
)
...
...
profiler/include/profiler/profile_image_to_column_impl.hpp
View file @
5394ac66
...
@@ -7,15 +7,11 @@
...
@@ -7,15 +7,11 @@
#include <iostream>
#include <iostream>
#include <typeinfo>
#include <typeinfo>
#include <limits>
#include <limits>
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_image_to_column.hpp"
#include "ck/tensor_operation/gpu/device/device_image_to_column.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
#include "ck/library/tensor_operation_instance/gpu/image_to_column.hpp"
#include "ck/library/tensor_operation_instance/gpu/image_to_column.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
@@ -126,9 +122,7 @@ bool profile_image_to_column_impl(int do_verification,
...
@@ -126,9 +122,7 @@ bool profile_image_to_column_impl(int do_verification,
std
::
cout
<<
"found "
<<
op_ptrs
.
size
()
<<
" instances"
<<
std
::
endl
;
std
::
cout
<<
"found "
<<
op_ptrs
.
size
()
<<
" instances"
<<
std
::
endl
;
std
::
string
best_op_name
;
std
::
string
best_op_name
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
// profile device op instances
// profile device op instances
...
@@ -157,19 +151,12 @@ bool profile_image_to_column_impl(int do_verification,
...
@@ -157,19 +151,12 @@ bool profile_image_to_column_impl(int do_verification,
is_supporting_instance
=
true
;
is_supporting_instance
=
true
;
// re-init output to zero before profiling next kernel
// re-init output to zero before profiling next kernel
out_device_buf
.
SetZero
();
out_device_buf
.
SetZero
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
auto
invoker_ptr
=
op_ptr
->
MakeInvokerPointer
();
auto
invoker_ptr
=
op_ptr
->
MakeInvokerPointer
();
float
avg_time
=
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
num_btype
=
2
*
NDoHoWo
*
CZYX
*
sizeof
(
OutputDataType
);
std
::
size_t
num_btype
=
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
conv_param
.
GetInputByte
<
InputDataType
>
()
+
NDoHoWo
*
CZYX
*
sizeof
(
OutputDataType
);
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
std
::
cout
<<
"Perf: "
<<
std
::
setw
(
10
)
<<
avg_time
<<
" ms, "
<<
gb_per_sec
<<
" GB/s, "
std
::
cout
<<
"Perf: "
<<
std
::
setw
(
10
)
<<
avg_time
<<
" ms, "
<<
gb_per_sec
<<
" GB/s, "
<<
op_name
<<
std
::
endl
;
<<
op_name
<<
std
::
endl
;
...
@@ -183,7 +170,6 @@ bool profile_image_to_column_impl(int do_verification,
...
@@ -183,7 +170,6 @@ bool profile_image_to_column_impl(int do_verification,
if
(
do_verification
)
if
(
do_verification
)
{
{
out_device_buf
.
FromDevice
(
device_output
.
mData
.
data
());
out_device_buf
.
FromDevice
(
device_output
.
mData
.
data
());
pass
=
pass
&
ck
::
utils
::
check_err
(
device_output
,
host_output
);
pass
=
pass
&
ck
::
utils
::
check_err
(
device_output
,
host_output
);
if
(
do_log
)
if
(
do_log
)
...
@@ -204,7 +190,7 @@ bool profile_image_to_column_impl(int do_verification,
...
@@ -204,7 +190,7 @@ bool profile_image_to_column_impl(int do_verification,
std
::
cout
<<
"Best configuration parameters:"
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
name: "
<<
best_op_name
<<
"
\n
avg_time: "
<<
best_avg_time
<<
"
\n
tflops: "
<<
best_tflops
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
<<
"
\n
GB/s: "
<<
best_gb_per_sec
<<
std
::
endl
;
return
is_supporting_instance
&&
pass
;
return
is_supporting_instance
&&
pass
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment