Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
df6f43d8
Commit
df6f43d8
authored
Jan 29, 2022
by
Jing Zhang
Browse files
clean code
parent
25751e37
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
79 additions
and
58 deletions
+79
-58
profiler/CMakeLists.txt
profiler/CMakeLists.txt
+8
-8
profiler/include/profile_gemm_impl.hpp
profiler/include/profile_gemm_impl.hpp
+67
-44
profiler/profiler.cpp
profiler/profiler.cpp
+4
-6
No files found.
profiler/CMakeLists.txt
View file @
df6f43d8
...
...
@@ -84,16 +84,16 @@ install(TARGETS device_conv2d_fwd_bias_relu_atomic_add_instance LIBRARY DESTINAT
set
(
PROFILER_SOURCE
profiler.cpp
profile_gemm.cpp
#
profile_conv_fwd.cpp
#
profile_conv_fwd_bias_relu.cpp
#
profile_conv_fwd_bias_relu_add.cpp
#
profile_conv_fwd_bias_relu_atomic_add.cpp
profile_conv_fwd.cpp
profile_conv_fwd_bias_relu.cpp
profile_conv_fwd_bias_relu_add.cpp
profile_conv_fwd_bias_relu_atomic_add.cpp
)
add_executable
(
ckProfiler
${
PROFILER_SOURCE
}
)
target_link_libraries
(
ckProfiler PRIVATE host_tensor
)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_instance)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_atomic_add_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_instance
)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance
)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance
)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_atomic_add_instance
)
profiler/include/profile_gemm_impl.hpp
View file @
df6f43d8
...
...
@@ -178,80 +178,103 @@ void profile_gemm_impl(int do_verification,
// add device GEMM instances
std
::
vector
<
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
DeviceGemmNoOpPtr
>
gemm_ptrs
;
if
(
KBatch
>
1
&&
is_same
<
ADataType
,
float
>::
value
)
if
constexpr
(
is_same
<
ADataType
,
float
>::
value
&&
is_same
<
BDataType
,
float
>::
value
&&
is_same
<
CDataType
,
float
>::
value
)
{
// ck::tensor_operation::device::device_gemm_instance::
// add_device_splitk_gemm_instance<float, float, float, ALayout, BLayout, CLayout>(
// gemm_ptrs);
}
else
{
if
(
is_same
<
ADataType
,
float
>::
value
&&
is_same
<
BDataType
,
float
>::
value
&&
is_same
<
CDataType
,
float
>::
value
)
if
constexpr
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
if
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
(
KBatch
>
1
)
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances
(
gemm_ptrs
);
add_device_gemm_xdl_
splitk_
f32_f32_f32_mk_kn_mn_instances
(
gemm_ptrs
);
}
else
if
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
else
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f32_f32_f32_mk_
n
k_mn_instances
(
gemm_ptrs
);
add_device_gemm_xdl_f32_f32_f32_mk_k
n
_mn_instances
(
gemm_ptrs
);
}
else
if
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
}
else
if
constexpr
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
if
(
KBatch
>
1
)
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f32_f32_f32_
km_kn
_mn_instances
(
gemm_ptrs
);
add_device_gemm_xdl_
splitk_
f32_f32_f32_
mk_nk
_mn_instances
(
gemm_ptrs
);
}
else
if
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
else
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f32_f32_f32_
k
m_nk_mn_instances
(
gemm_ptrs
);
add_device_gemm_xdl_f32_f32_f32_m
k
_nk_mn_instances
(
gemm_ptrs
);
}
}
else
if
(
is_same
<
ADataType
,
half_t
>::
value
&&
is_same
<
BDataType
,
half_t
>::
value
&&
is_same
<
CDataType
,
half_t
>::
value
)
else
if
constexpr
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
if
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
(
KBatch
>
1
)
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_
f16_f16_f16_mk
_kn_mn_instances
(
gemm_ptrs
);
add_device_gemm_xdl_
splitk_f32_f32_f32_km
_kn_mn_instances
(
gemm_ptrs
);
}
else
if
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
else
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f
16_f16_f16_mk_nk
_mn_instances
(
gemm_ptrs
);
add_device_gemm_xdl_f
32_f32_f32_km_kn
_mn_instances
(
gemm_ptrs
);
}
else
if
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
}
else
if
constexpr
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
if
(
KBatch
>
1
)
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_
f16_f16_f16
_km_
k
n_mn_instances
(
gemm_ptrs
);
add_device_gemm_xdl_
splitk_f32_f32_f32
_km_n
k
_mn_instances
(
gemm_ptrs
);
}
else
if
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
else
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f
16_f16_f16
_km_nk_mn_instances
(
gemm_ptrs
);
add_device_gemm_xdl_f
32_f32_f32
_km_nk_mn_instances
(
gemm_ptrs
);
}
}
}
else
if
constexpr
(
is_same
<
ADataType
,
half_t
>::
value
&&
is_same
<
BDataType
,
half_t
>::
value
&&
is_same
<
CDataType
,
half_t
>::
value
)
{
if
constexpr
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances
(
gemm_ptrs
);
}
else
if
constexpr
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances
(
gemm_ptrs
);
}
else
if
constexpr
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances
(
gemm_ptrs
);
}
else
if
constexpr
(
is_same
<
ALayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
BLayout
,
tensor_layout
::
gemm
::
ColumnMajor
>::
value
&&
is_same
<
CLayout
,
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
ck
::
tensor_operation
::
device
::
device_gemm_instance
::
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances
(
gemm_ptrs
);
}
}
if
(
gemm_ptrs
.
size
()
<=
0
)
{
...
...
profiler/profiler.cpp
View file @
df6f43d8
...
...
@@ -6,10 +6,10 @@
#include <half.hpp>
int
profile_gemm
(
int
,
char
*
[]);
//
int profile_conv_fwd(int, char*[]);
//
int profile_conv_fwd_bias_relu(int, char*[]);
//
int profile_conv_fwd_bias_relu_add(int, char*[]);
//
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_atomic_add
(
int
,
char
*
[]);
int
main
(
int
argc
,
char
*
argv
[])
{
...
...
@@ -17,7 +17,6 @@ int main(int argc, char* argv[])
{
return
profile_gemm
(
argc
,
argv
);
}
#if 0
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd"
)
==
0
)
{
return
profile_conv_fwd
(
argc
,
argv
);
...
...
@@ -34,7 +33,6 @@ int main(int argc, char* argv[])
{
return
profile_conv_fwd_bias_relu_atomic_add
(
argc
,
argv
);
}
#endif
else
{
printf
(
"arg1: tensor operation (gemm: GEMM;
\n
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment