Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
63ecd2e7
Commit
63ecd2e7
authored
Mar 10, 2022
by
Jianfeng yan
Browse files
add debug code for reduction
parent
7887c0ac
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
81 additions
and
71 deletions
+81
-71
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
...operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
+6
-0
profiler/CMakeLists.txt
profiler/CMakeLists.txt
+20
-20
profiler/src/profiler.cpp
profiler/src/profiler.cpp
+55
-51
No files found.
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
View file @
63ecd2e7
...
@@ -79,6 +79,12 @@ struct ThreadwiseTensorSliceTransfer_v1r3
...
@@ -79,6 +79,12 @@ struct ThreadwiseTensorSliceTransfer_v1r3
{
{
static_assert
(
SrcDesc
::
IsKnownAtCompileTime
(),
static_assert
(
SrcDesc
::
IsKnownAtCompileTime
(),
"wrong! SrcDesc need to known at compile-time"
);
"wrong! SrcDesc need to known at compile-time"
);
constexpr
index_t
slice_size
=
reduce_on_sequence
(
SliceLengths
{},
math
::
multiplies
{},
Number
<
1
>
{});
if
constexpr
(
0
!=
slice_size
%
DstScalarPerVector
)
{
printf
(
"%c
\n
"
,
SliceLengths
{});
}
static_assert
(
0
==
slice_size
%
DstScalarPerVector
);
}
}
__device__
void
SetDstSliceOrigin
(
const
DstDesc
&
dst_desc
,
const
Index
&
dst_slice_origin_idx
)
__device__
void
SetDstSliceOrigin
(
const
DstDesc
&
dst_desc
,
const
Index
&
dst_slice_origin_idx
)
...
...
profiler/CMakeLists.txt
View file @
63ecd2e7
...
@@ -22,30 +22,30 @@ include_directories(BEFORE
...
@@ -22,30 +22,30 @@ include_directories(BEFORE
# ck_profiler
# ck_profiler
set
(
PROFILER_SOURCE
set
(
PROFILER_SOURCE
src/profiler.cpp
src/profiler.cpp
src/profile_gemm.cpp
#
src/profile_gemm.cpp
src/profile_gemm_bias_2d.cpp
#
src/profile_gemm_bias_2d.cpp
src/profile_gemm_bias_relu.cpp
#
src/profile_gemm_bias_relu.cpp
src/profile_gemm_bias_relu_add.cpp
#
src/profile_gemm_bias_relu_add.cpp
src/profile_batched_gemm.cpp
#
src/profile_batched_gemm.cpp
src/profile_conv_fwd.cpp
#
src/profile_conv_fwd.cpp
src/profile_conv_fwd_bias_relu.cpp
#
src/profile_conv_fwd_bias_relu.cpp
src/profile_conv_fwd_bias_relu_add.cpp
#
src/profile_conv_fwd_bias_relu_add.cpp
src/profile_conv_fwd_bias_relu_atomic_add.cpp
#
src/profile_conv_fwd_bias_relu_atomic_add.cpp
src/profile_conv_bwd_data.cpp
#
src/profile_conv_bwd_data.cpp
src/profile_reduce.cpp
src/profile_reduce.cpp
)
)
add_executable
(
ckProfiler
${
PROFILER_SOURCE
}
)
add_executable
(
ckProfiler
${
PROFILER_SOURCE
}
)
target_link_libraries
(
ckProfiler PRIVATE host_tensor
)
target_link_libraries
(
ckProfiler PRIVATE host_tensor
)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_bias2d_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_bias2d_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_bias_relu_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_bias_relu_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_bias_relu_add_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_bias_relu_add_instance)
target_link_libraries
(
ckProfiler PRIVATE device_batched_gemm_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_batched_gemm_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_atomic_add_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_atomic_add_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_bwd_data_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_bwd_data_instance)
target_link_libraries
(
ckProfiler PRIVATE device_reduce_instance
)
target_link_libraries
(
ckProfiler PRIVATE device_reduce_instance
)
profiler/src/profiler.cpp
View file @
63ecd2e7
...
@@ -4,61 +4,65 @@
...
@@ -4,61 +4,65 @@
#include <cstdlib>
#include <cstdlib>
#include <cstring>
#include <cstring>
int
profile_gemm
(
int
,
char
*
[]);
//
int profile_gemm(int, char*[]);
int
profile_batched_gemm
(
int
,
char
*
[]);
//
int profile_batched_gemm(int, char*[]);
int
profile_gemm_bias_2d
(
int
,
char
*
[]);
//
int profile_gemm_bias_2d(int, char*[]);
int
profile_gemm_bias_relu
(
int
,
char
*
[]);
//
int profile_gemm_bias_relu(int, char*[]);
int
profile_gemm_bias_relu_add
(
int
,
char
*
[]);
//
int profile_gemm_bias_relu_add(int, char*[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
//
int profile_conv_fwd(int, char*[]);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
//
int profile_conv_fwd_bias_relu(int, char*[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
//
int profile_conv_fwd_bias_relu_add(int, char*[]);
int
profile_conv_fwd_bias_relu_atomic_add
(
int
,
char
*
[]);
//
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
int
profile_conv_bwd_data
(
int
,
char
*
[]);
//
int profile_conv_bwd_data(int, char*[]);
int
profile_reduce
(
int
,
char
*
[]);
int
profile_reduce
(
int
,
char
*
[]);
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
if
(
strcmp
(
argv
[
1
],
"gemm"
)
==
0
)
// if(strcmp(argv[1], "gemm") == 0)
{
// {
return
profile_gemm
(
argc
,
argv
);
// return profile_gemm(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"gemm_bias_2d"
)
==
0
)
// else if(strcmp(argv[1], "gemm_bias_2d") == 0)
{
// {
return
profile_gemm_bias_2d
(
argc
,
argv
);
// return profile_gemm_bias_2d(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"gemm_bias_relu"
)
==
0
)
// else if(strcmp(argv[1], "gemm_bias_relu") == 0)
{
// {
return
profile_gemm_bias_relu
(
argc
,
argv
);
// return profile_gemm_bias_relu(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"gemm_bias_relu_add"
)
==
0
)
// else if(strcmp(argv[1], "gemm_bias_relu_add") == 0)
{
// {
return
profile_gemm_bias_relu_add
(
argc
,
argv
);
// return profile_gemm_bias_relu_add(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"batched_gemm"
)
==
0
)
// else if(strcmp(argv[1], "batched_gemm") == 0)
{
// {
return
profile_batched_gemm
(
argc
,
argv
);
// return profile_batched_gemm(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd"
)
==
0
)
// else if(strcmp(argv[1], "conv_fwd") == 0)
{
// {
return
profile_conv_fwd
(
argc
,
argv
);
// return profile_conv_fwd(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu"
)
==
0
)
// else if(strcmp(argv[1], "conv_fwd_bias_relu") == 0)
{
// {
return
profile_conv_fwd_bias_relu
(
argc
,
argv
);
// return profile_conv_fwd_bias_relu(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu_add"
)
==
0
)
// else if(strcmp(argv[1], "conv_fwd_bias_relu_add") == 0)
{
// {
return
profile_conv_fwd_bias_relu_add
(
argc
,
argv
);
// return profile_conv_fwd_bias_relu_add(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu_atomic_add"
)
==
0
)
// else if(strcmp(argv[1], "conv_fwd_bias_relu_atomic_add") == 0)
{
// {
return
profile_conv_fwd_bias_relu_atomic_add
(
argc
,
argv
);
// return profile_conv_fwd_bias_relu_atomic_add(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"conv_bwd"
)
==
0
)
// else if(strcmp(argv[1], "conv_bwd") == 0)
{
// {
return
profile_conv_bwd_data
(
argc
,
argv
);
// return profile_conv_bwd_data(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"reduce"
)
==
0
)
// else if(strcmp(argv[1], "reduce") == 0)
// {
// return profile_reduce(argc, argv);
// }
if
(
strcmp
(
argv
[
1
],
"reduce"
)
==
0
)
{
{
return
profile_reduce
(
argc
,
argv
);
return
profile_reduce
(
argc
,
argv
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment