Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
27876602
Commit
27876602
authored
Nov 18, 2022
by
Po-Yen, Chen
Browse files
Use macro to eliminate redundant code
parent
f744c531
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
103 additions
and
68 deletions
+103
-68
profiler/src/profile_batched_gemm.cpp
profiler/src/profile_batched_gemm.cpp
+5
-2
profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp
profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp
+5
-5
profiler/src/profile_batched_gemm_gemm.cpp
profiler/src/profile_batched_gemm_gemm.cpp
+5
-2
profiler/src/profile_batched_gemm_reduce.cpp
profiler/src/profile_batched_gemm_reduce.cpp
+5
-4
profiler/src/profile_conv_bwd_data.cpp
profiler/src/profile_conv_bwd_data.cpp
+5
-2
profiler/src/profile_conv_fwd.cpp
profiler/src/profile_conv_fwd.cpp
+5
-2
profiler/src/profile_conv_fwd_bias_relu.cpp
profiler/src/profile_conv_fwd_bias_relu.cpp
+5
-4
profiler/src/profile_conv_fwd_bias_relu_add.cpp
profiler/src/profile_conv_fwd_bias_relu_add.cpp
+5
-5
profiler/src/profile_gemm.cpp
profiler/src/profile_gemm.cpp
+5
-2
profiler/src/profile_gemm_add_add_fastgelu.cpp
profiler/src/profile_gemm_add_add_fastgelu.cpp
+5
-4
profiler/src/profile_gemm_bias_add_reduce.cpp
profiler/src/profile_gemm_bias_add_reduce.cpp
+5
-4
profiler/src/profile_gemm_bilinear.cpp
profiler/src/profile_gemm_bilinear.cpp
+5
-2
profiler/src/profile_gemm_reduce.cpp
profiler/src/profile_gemm_reduce.cpp
+5
-2
profiler/src/profile_gemm_splitk.cpp
profiler/src/profile_gemm_splitk.cpp
+5
-2
profiler/src/profile_grouped_conv_bwd_weight.cpp
profiler/src/profile_grouped_conv_bwd_weight.cpp
+18
-18
profiler/src/profile_grouped_conv_fwd.cpp
profiler/src/profile_grouped_conv_fwd.cpp
+5
-4
profiler/src/profile_grouped_gemm.cpp
profiler/src/profile_grouped_gemm.cpp
+5
-2
profiler/src/profile_groupnorm.cpp
profiler/src/profile_groupnorm.cpp
+5
-2
No files found.
profiler/src/profile_batched_gemm.cpp
View file @
27876602
...
...
@@ -26,12 +26,15 @@ enum struct GemmDataType
INT8_INT8_INT8
,
// 3
};
#define OP_NAME "batched_gemm"
#define OP_DESC "Batched GEMM"
int
profile_batched_gemm
(
int
argc
,
char
*
argv
[])
{
if
(
argc
!=
18
)
{
// clang-format off
printf
(
"arg1: tensor operation (
batched_gemm: Batched GEMM
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16, 2: bf16, 3: int8)
\n
"
);
printf
(
"arg3: matrix layout (0: A[g, m, k] * B[g, k, n] = C[g, m, n];
\n
"
);
printf
(
" 1: A[g, m, k] * B[g, n, k] = C[g, m, n];
\n
"
);
...
...
@@ -197,4 +200,4 @@ int profile_batched_gemm(int argc, char* argv[])
}
}
REGISTER_PROFILER_OPERATION
(
"batched_gemm"
,
"Batched GEMM"
,
profile_batched_gemm
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_batched_gemm
);
profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp
View file @
27876602
...
...
@@ -15,6 +15,9 @@ using F32 = float;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
#define OP_NAME "batched_gemm_add_relu_gemm_add"
#define OP_DESC "Batched GEMM+Add+Relu+GEMM+Add"
int
profile_batched_gemm_add_relu_gemm_add
(
int
argc
,
char
*
argv
[])
{
enum
struct
GemmMatrixLayout
...
...
@@ -110,8 +113,7 @@ int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[])
}
else
{
printf
(
"arg1: tensor operation (batched_gemm_add_relu_gemm_add: "
"Batched GEMM+Add+Relu+GEMM+Add)
\n
"
);
printf
(
"arg1: tensor operation ("
OP_NAME
": "
OP_DESC
")
\n
"
);
printf
(
"arg2: data type (1: fp16)
\n
"
);
printf
(
"arg3: matrix layout (0: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[n, o] + D1[m, o] "
"= E1[m, o]; 1: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[o, n] + D1[m, o] = "
...
...
@@ -209,6 +211,4 @@ int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"batched_gemm_add_relu_gemm_add"
,
"Batched GEMM+Add+Relu+GEMM+Add"
,
profile_batched_gemm_add_relu_gemm_add
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_batched_gemm_add_relu_gemm_add
);
profiler/src/profile_batched_gemm_gemm.cpp
View file @
27876602
...
...
@@ -15,6 +15,9 @@ using F32 = float;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
#define OP_NAME "batched_gemm_gemm"
#define OP_DESC "Batched GEMM+GEMM"
int
profile_batched_gemm_gemm
(
int
argc
,
char
*
argv
[])
{
enum
struct
GemmMatrixLayout
...
...
@@ -102,7 +105,7 @@ int profile_batched_gemm_gemm(int argc, char* argv[])
}
else
{
printf
(
"arg1: tensor operation (
batched_gemm_gemm: Batched GEMM+GEMM
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (1: fp16)
\n
"
);
printf
(
"arg3: matrix layout (0: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[n, o] + D1[m, o] "
"= E1[m, o]; 1: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[o, n] + D1[m, o] = E1[m, "
...
...
@@ -181,4 +184,4 @@ int profile_batched_gemm_gemm(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"batched_gemm_gemm"
,
"Batched GEMM+GEMM"
,
profile_batched_gemm_gemm
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_batched_gemm_gemm
);
profiler/src/profile_batched_gemm_reduce.cpp
View file @
27876602
...
...
@@ -9,6 +9,9 @@
#include "profiler/profile_batched_gemm_reduce_impl.hpp"
#include "profiler_operation_registry.hpp"
#define OP_NAME "batched_gemm_reduce"
#define OP_DESC "Batched GEMM+Reduce"
int
profile_batched_gemm_reduce
(
int
argc
,
char
*
argv
[])
{
enum
struct
GemmMatrixLayout
...
...
@@ -27,7 +30,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
if
(
argc
!=
15
)
{
printf
(
"arg1: tensor operation (
batched_gemm_reduce: Batched GEMM+Reduce
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
);
...
...
@@ -153,6 +156,4 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"batched_gemm_reduce"
,
"Batched GEMM+Reduce"
,
profile_batched_gemm_reduce
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_batched_gemm_reduce
);
profiler/src/profile_conv_bwd_data.cpp
View file @
27876602
...
...
@@ -25,10 +25,13 @@ enum struct ConvDataType
INT8_INT8_INT8
,
// 3
};
#define OP_NAME "conv_bwd_data"
#define OP_DESC "Convolution Backward Data"
static
void
print_helper_msg
()
{
std
::
cout
<<
"arg1: tensor operation (
conv_bwd_data: Convolution Backward Data
)
\n
"
<<
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
<<
"arg2: data type (0: Input fp32, Weight fp32, Output fp32
\n
"
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 2: Input bf16, Weight bf16, Output bf16
\n
"
...
...
@@ -184,4 +187,4 @@ int profile_conv_bwd_data(int argc, char* argv[])
return
1
;
}
REGISTER_PROFILER_OPERATION
(
"conv_bwd_data"
,
"Convolution Backward Data"
,
profile_conv_bwd_data
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_conv_bwd_data
);
profiler/src/profile_conv_fwd.cpp
View file @
27876602
...
...
@@ -25,11 +25,14 @@ enum struct ConvDataType
INT8_INT8_INT8
,
// 3
};
#define OP_NAME "conv_fwd"
#define OP_DESC "Convolution Forward"
static
void
print_helper_msg
()
{
std
::
cout
// clang-format-off
<<
"arg1: tensor operation (
conv_fwd: Convolution Forward
)
\n
"
<<
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
<<
"arg2: data type (0: Input fp32, Weight fp32, Output fp32
\n
"
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 2: Input bf16, Weight bf16, Output bf16
\n
"
...
...
@@ -186,4 +189,4 @@ int profile_conv_fwd(int argc, char* argv[])
return
1
;
}
REGISTER_PROFILER_OPERATION
(
"conv_fwd"
,
"Convolution Forward"
,
profile_conv_fwd
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_conv_fwd
);
profiler/src/profile_conv_fwd_bias_relu.cpp
View file @
27876602
...
...
@@ -33,11 +33,14 @@ enum struct ConvOutputLayout
NHWK
,
// 1
};
#define OP_NAME "conv_fwd_bias_relu"
#define OP_DESC "Convolution Forward+Bias+ReLU"
int
profile_conv_fwd_bias_relu
(
int
argc
,
char
*
argv
[])
{
if
(
argc
!=
25
)
{
printf
(
"arg1: tensor operation (
conv_fwd_bias_relu: Convolution Forward+Bias+ReLU
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: input tensor layout (0: NCHW; 1: NHWC)
\n
"
);
printf
(
"arg4: weight tensor layout (0: KCYX; 1: KYXC)
\n
"
);
...
...
@@ -116,6 +119,4 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"conv_fwd_bias_relu"
,
"Convolution Forward+Bias+ReLU"
,
profile_conv_fwd_bias_relu
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_conv_fwd_bias_relu
);
profiler/src/profile_conv_fwd_bias_relu_add.cpp
View file @
27876602
...
...
@@ -33,12 +33,14 @@ enum struct ConvOutputLayout
NHWK
,
// 1
};
#define OP_NAME "conv_fwd_bias_relu_add"
#define OP_DESC "Convolution Forward+Bias+ReLU+Add"
int
profile_conv_fwd_bias_relu_add
(
int
argc
,
char
*
argv
[])
{
if
(
argc
!=
25
)
{
printf
(
"arg1: tensor operation (conv_fwd_bias_relu_add: Convolution Forward+Bias+ReLU+Add)
\n
"
);
printf
(
"arg1: tensor operation ("
OP_NAME
": "
OP_DESC
")
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: input tensor layout (0: NCHW; 1: NHWC)
\n
"
);
printf
(
"arg4: weight tensor layout (0: KCYX; 1: KYXC)
\n
"
);
...
...
@@ -117,6 +119,4 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"conv_fwd_bias_relu_add"
,
"Convolution Forward+Bias+ReLU+Add"
,
profile_conv_fwd_bias_relu_add
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_conv_fwd_bias_relu_add
);
profiler/src/profile_gemm.cpp
View file @
27876602
...
...
@@ -25,9 +25,12 @@ enum struct GemmDataType
INT8_INT8_INT8
,
// 3
};
#define OP_NAME "gemm"
#define OP_DESC "GEMM"
static
void
print_helper_msg
()
{
std
::
cout
<<
"arg1: tensor operation (
gemm: GEMM
)
\n
"
std
::
cout
<<
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
<<
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)
\n
"
<<
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
<<
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
...
...
@@ -186,4 +189,4 @@ int profile_gemm(int argc, char* argv[])
}
}
REGISTER_PROFILER_OPERATION
(
"gemm"
,
"GEMM"
,
profile_gemm
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_gemm
);
profiler/src/profile_gemm_add_add_fastgelu.cpp
View file @
27876602
...
...
@@ -9,6 +9,9 @@
#include "profiler/profile_gemm_add_add_fastgelu_impl.hpp"
#include "profiler_operation_registry.hpp"
#define OP_NAME "gemm_add_add_fastgelu"
#define OP_DESC "GEMM+Add+Add+FastGeLU"
int
profile_gemm_add_add_fastgelu
(
int
argc
,
char
*
argv
[])
{
enum
struct
MatrixLayout
...
...
@@ -30,7 +33,7 @@ int profile_gemm_add_add_fastgelu(int argc, char* argv[])
if
(
argc
!=
16
)
{
// clang-format off
printf
(
"arg1: tensor operation (
gemm_add_add_fastgelu: GEMM+Add+Add+FastGeLU
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)
\n
"
);
printf
(
"arg3: matrix layout (0: E[m, n] = FastGeLU(A[m, k] * B[k, n] + D0[m, n] + D1[m, n]);
\n
"
);
printf
(
" 1: E[m, n] = FastGeLU(A[m, k] * B[n, k] + D0[m, n] + D1[m, n]);
\n
"
);
...
...
@@ -152,6 +155,4 @@ int profile_gemm_add_add_fastgelu(int argc, char* argv[])
}
}
REGISTER_PROFILER_OPERATION
(
"gemm_add_add_fastgelu"
,
"GEMM+Add+Add+FastGeLU"
,
profile_gemm_add_add_fastgelu
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_gemm_add_add_fastgelu
);
profiler/src/profile_gemm_bias_add_reduce.cpp
View file @
27876602
...
...
@@ -9,6 +9,9 @@
#include "profiler/profile_gemm_bias_add_reduce_impl.hpp"
#include "profiler_operation_registry.hpp"
#define OP_NAME "gemm_bias_add_reduce"
#define OP_DESC "GEMM+Bias+Add+Reduce"
int
profile_gemm_bias_add_reduce
(
int
argc
,
char
*
argv
[])
{
enum
struct
GemmMatrixLayout
...
...
@@ -27,7 +30,7 @@ int profile_gemm_bias_add_reduce(int argc, char* argv[])
if
(
!
(
argc
==
14
||
argc
==
15
))
{
printf
(
"arg1: tensor operation (
gemm_bias_add_reduce: GEMM+Bias+Add+Reduce
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
);
...
...
@@ -161,6 +164,4 @@ int profile_gemm_bias_add_reduce(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"gemm_bias_add_reduce"
,
"GEMM+Bias+Add+Reduce"
,
profile_gemm_bias_add_reduce
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_gemm_bias_add_reduce
);
profiler/src/profile_gemm_bilinear.cpp
View file @
27876602
...
...
@@ -9,6 +9,9 @@
#include "profiler/profile_gemm_bilinear_impl.hpp"
#include "profiler_operation_registry.hpp"
#define OP_NAME "gemm_bilinear"
#define OP_DESC "GEMM+Bilinear"
int
profile_gemm_bilinear
(
int
argc
,
char
*
argv
[])
{
enum
struct
MatrixLayout
...
...
@@ -30,7 +33,7 @@ int profile_gemm_bilinear(int argc, char* argv[])
if
(
argc
!=
17
)
{
// clang-format off
printf
(
"arg1: tensor operation (
gemm_bilinear: GEMM+Bilinear
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)
\n
"
);
printf
(
"arg3: matrix layout (0: E[m, n] = alpha * A[m, k] * B[k, n] + beta * D[m, n];
\n
"
);
printf
(
" 1: E[m, n] = alpha * A[m, k] * B[n, k] + beta * D[m, n];
\n
"
);
...
...
@@ -146,4 +149,4 @@ int profile_gemm_bilinear(int argc, char* argv[])
}
}
REGISTER_PROFILER_OPERATION
(
"gemm_bilinear"
,
"GEMM+Bilinear"
,
profile_gemm_bilinear
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_gemm_bilinear
);
profiler/src/profile_gemm_reduce.cpp
View file @
27876602
...
...
@@ -9,6 +9,9 @@
#include "profiler/profile_gemm_reduce_impl.hpp"
#include "profiler_operation_registry.hpp"
#define OP_NAME "gemm_reduce"
#define OP_DESC "GEMM+Reduce"
int
profile_gemm_reduce
(
int
argc
,
char
*
argv
[])
{
enum
struct
GemmMatrixLayout
...
...
@@ -27,7 +30,7 @@ int profile_gemm_reduce(int argc, char* argv[])
if
(
!
(
argc
==
14
||
argc
==
15
))
{
printf
(
"arg1: tensor operation (
gemm_reduce: GEMM+Reduce
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
);
...
...
@@ -148,4 +151,4 @@ int profile_gemm_reduce(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"gemm_reduce"
,
"GEMM+Reduce"
,
profile_gemm_reduce
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_gemm_reduce
);
profiler/src/profile_gemm_splitk.cpp
View file @
27876602
...
...
@@ -25,11 +25,14 @@ enum struct GemmDataType
INT8_INT8_INT8
,
// 3
};
#define OP_NAME "gemm_splitk"
#define OP_DESC "Split-K GEMM"
int
profile_gemm_splitk
(
int
argc
,
char
*
argv
[])
{
if
(
argc
!=
15
)
{
printf
(
"arg1: tensor operation (
gemm_splitk: Split-K GEMM
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)
\n
"
);
printf
(
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
);
...
...
@@ -148,4 +151,4 @@ int profile_gemm_splitk(int argc, char* argv[])
}
}
REGISTER_PROFILER_OPERATION
(
"gemm_splitk"
,
"Split-K GEMM"
,
profile_gemm_splitk
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_gemm_splitk
);
profiler/src/profile_grouped_conv_bwd_weight.cpp
View file @
27876602
...
...
@@ -24,23 +24,25 @@ enum struct ConvDataType
BF16_F32_BF16
,
// 2
};
#define OP_NAME "grouped_conv_bwd_weight"
#define OP_DESC "Grouped Convolution Backward Weight"
static
void
print_helper_msg
()
{
std
::
cout
<<
"arg1: tensor operation (grouped_conv_bwd_weight: Grouped Convolution Backward Weight
\n
"
<<
"arg2: data type (0: Input fp32, Weight fp32, Output fp32
\n
"
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 2: Input bf16, Weight fp32, Output bf16)
\n
"
<<
"arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, "
"N, K, Ho, Wo]
\n
"
<<
" 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
"N, Ho, Wo, K]
\n
"
<<
"arg4: verification (0: no, 1: yes)
\n
"
<<
"arg5: initialization (0: no init, 1: integer value, 2: decimal value)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
<<
"arg7: time kernel (0: no, 1: yes)
\n
"
<<
ck
::
utils
::
conv
::
get_conv_param_parser_helper_msg
()
<<
" SplitK
\n
"
<<
std
::
endl
;
std
::
cout
<<
"arg1: tensor operation ("
OP_NAME
": "
OP_DESC
")
\n
"
<<
"arg2: data type (0: Input fp32, Weight fp32, Output fp32
\n
"
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 2: Input bf16, Weight fp32, Output bf16)
\n
"
<<
"arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, "
"N, K, Ho, Wo]
\n
"
<<
" 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
"N, Ho, Wo, K]
\n
"
<<
"arg4: verification (0: no, 1: yes)
\n
"
<<
"arg5: initialization (0: no init, 1: integer value, 2: decimal value)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
<<
"arg7: time kernel (0: no, 1: yes)
\n
"
<<
ck
::
utils
::
conv
::
get_conv_param_parser_helper_msg
()
<<
" SplitK
\n
"
<<
std
::
endl
;
}
}
// namespace
...
...
@@ -177,6 +179,4 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
return
1
;
}
REGISTER_PROFILER_OPERATION
(
"grouped_conv_bwd_weight"
,
"Grouped Convolution Backward Weight"
,
profile_grouped_conv_bwd_weight
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_grouped_conv_bwd_weight
);
profiler/src/profile_grouped_conv_fwd.cpp
View file @
27876602
...
...
@@ -25,11 +25,14 @@ enum struct ConvDataType
INT8_INT8_INT8
,
// 3
};
#define OP_NAME "grouped_conv_fwd"
#define OP_DESC "Grouped Convolution Forward"
static
void
print_helper_msg
()
{
std
::
cout
// clang-format off
<<
"arg1: tensor operation (
grouped_conv_fwd: Grouped Convolution Forward
)
\n
"
<<
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
<<
"arg2: data type (0: Input fp32, Weight fp32, Output fp32
\n
"
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 2: Input bf16, Weight bf16, Output bf16
\n
"
...
...
@@ -254,6 +257,4 @@ int profile_grouped_conv_fwd(int argc, char* argv[])
return
1
;
}
REGISTER_PROFILER_OPERATION
(
"grouped_conv_fwd"
,
"Grouped Convolution Forward"
,
profile_grouped_conv_fwd
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_grouped_conv_fwd
);
profiler/src/profile_grouped_gemm.cpp
View file @
27876602
...
...
@@ -45,11 +45,14 @@ std::vector<int> argToIntArray(char* input)
return
out
;
}
#define OP_NAME "grouped_gemm"
#define OP_DESC "Grouped GEMM"
int
profile_grouped_gemm
(
int
argc
,
char
*
argv
[])
{
if
(
!
(
argc
==
14
))
{
printf
(
"arg1: tensor operation (
grouped_gemm: Grouped GEMM
)
\n
"
);
printf
(
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)
\n
"
);
printf
(
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
);
...
...
@@ -163,4 +166,4 @@ int profile_grouped_gemm(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"grouped_gemm"
,
"Grouped GEMM"
,
profile_grouped_gemm
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_grouped_gemm
);
profiler/src/profile_groupnorm.cpp
View file @
27876602
...
...
@@ -44,9 +44,12 @@ struct GroupnormArgParser
}
};
#define OP_NAME "groupnorm"
#define OP_DESC "Group Normalization"
void
print_help_groupnorm
()
{
std
::
cout
<<
"arg1: tensor operation (
groupnorm: Group Normalization
)
\n
"
std
::
cout
<<
"arg1: tensor operation (
"
OP_NAME
": "
OP_DESC
"
)
\n
"
<<
"arg2: data type (0: fp16; 1: fp32)
\n
"
<<
"arg3: verification (0: no; 1: yes)
\n
"
<<
"arg4: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
...
...
@@ -106,4 +109,4 @@ int profile_groupnorm(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"groupnorm"
,
"Group Normalization"
,
profile_groupnorm
);
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_groupnorm
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment