"vscode:/vscode.git/clone" did not exist on "c8b0f0eb21354df3b920e75fc1517d4d06d757b4"
Commit 27876602 authored by Po-Yen, Chen's avatar Po-Yen, Chen
Browse files

Use macro to eliminate redundant code

parent f744c531
...@@ -26,12 +26,15 @@ enum struct GemmDataType ...@@ -26,12 +26,15 @@ enum struct GemmDataType
INT8_INT8_INT8, // 3 INT8_INT8_INT8, // 3
}; };
#define OP_NAME "batched_gemm"
#define OP_DESC "Batched GEMM"
int profile_batched_gemm(int argc, char* argv[]) int profile_batched_gemm(int argc, char* argv[])
{ {
if(argc != 18) if(argc != 18)
{ {
// clang-format off // clang-format off
printf("arg1: tensor operation (batched_gemm: Batched GEMM)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16, 2: bf16, 3: int8)\n"); printf("arg2: data type (0: fp32; 1: fp16, 2: bf16, 3: int8)\n");
printf("arg3: matrix layout (0: A[g, m, k] * B[g, k, n] = C[g, m, n];\n"); printf("arg3: matrix layout (0: A[g, m, k] * B[g, k, n] = C[g, m, n];\n");
printf(" 1: A[g, m, k] * B[g, n, k] = C[g, m, n];\n"); printf(" 1: A[g, m, k] * B[g, n, k] = C[g, m, n];\n");
...@@ -197,4 +200,4 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -197,4 +200,4 @@ int profile_batched_gemm(int argc, char* argv[])
} }
} }
REGISTER_PROFILER_OPERATION("batched_gemm", "Batched GEMM", profile_batched_gemm); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_batched_gemm);
...@@ -15,6 +15,9 @@ using F32 = float; ...@@ -15,6 +15,9 @@ using F32 = float;
using Row = ck::tensor_layout::gemm::RowMajor; using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor; using Col = ck::tensor_layout::gemm::ColumnMajor;
#define OP_NAME "batched_gemm_add_relu_gemm_add"
#define OP_DESC "Batched GEMM+Add+Relu+GEMM+Add"
int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[]) int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[])
{ {
enum struct GemmMatrixLayout enum struct GemmMatrixLayout
...@@ -110,8 +113,7 @@ int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[]) ...@@ -110,8 +113,7 @@ int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[])
} }
else else
{ {
printf("arg1: tensor operation (batched_gemm_add_relu_gemm_add: " printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
"Batched GEMM+Add+Relu+GEMM+Add)\n");
printf("arg2: data type (1: fp16)\n"); printf("arg2: data type (1: fp16)\n");
printf("arg3: matrix layout (0: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[n, o] + D1[m, o] " printf("arg3: matrix layout (0: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[n, o] + D1[m, o] "
"= E1[m, o]; 1: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[o, n] + D1[m, o] = " "= E1[m, o]; 1: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[o, n] + D1[m, o] = "
...@@ -209,6 +211,4 @@ int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[]) ...@@ -209,6 +211,4 @@ int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[])
return 0; return 0;
} }
REGISTER_PROFILER_OPERATION("batched_gemm_add_relu_gemm_add", REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_batched_gemm_add_relu_gemm_add);
"Batched GEMM+Add+Relu+GEMM+Add",
profile_batched_gemm_add_relu_gemm_add);
...@@ -15,6 +15,9 @@ using F32 = float; ...@@ -15,6 +15,9 @@ using F32 = float;
using Row = ck::tensor_layout::gemm::RowMajor; using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor; using Col = ck::tensor_layout::gemm::ColumnMajor;
#define OP_NAME "batched_gemm_gemm"
#define OP_DESC "Batched GEMM+GEMM"
int profile_batched_gemm_gemm(int argc, char* argv[]) int profile_batched_gemm_gemm(int argc, char* argv[])
{ {
enum struct GemmMatrixLayout enum struct GemmMatrixLayout
...@@ -102,7 +105,7 @@ int profile_batched_gemm_gemm(int argc, char* argv[]) ...@@ -102,7 +105,7 @@ int profile_batched_gemm_gemm(int argc, char* argv[])
} }
else else
{ {
printf("arg1: tensor operation (batched_gemm_gemm: Batched GEMM+GEMM)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (1: fp16)\n"); printf("arg2: data type (1: fp16)\n");
printf("arg3: matrix layout (0: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[n, o] + D1[m, o] " printf("arg3: matrix layout (0: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[n, o] + D1[m, o] "
"= E1[m, o]; 1: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[o, n] + D1[m, o] = E1[m, " "= E1[m, o]; 1: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[o, n] + D1[m, o] = E1[m, "
...@@ -181,4 +184,4 @@ int profile_batched_gemm_gemm(int argc, char* argv[]) ...@@ -181,4 +184,4 @@ int profile_batched_gemm_gemm(int argc, char* argv[])
return 0; return 0;
} }
REGISTER_PROFILER_OPERATION("batched_gemm_gemm", "Batched GEMM+GEMM", profile_batched_gemm_gemm); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_batched_gemm_gemm);
...@@ -9,6 +9,9 @@ ...@@ -9,6 +9,9 @@
#include "profiler/profile_batched_gemm_reduce_impl.hpp" #include "profiler/profile_batched_gemm_reduce_impl.hpp"
#include "profiler_operation_registry.hpp" #include "profiler_operation_registry.hpp"
#define OP_NAME "batched_gemm_reduce"
#define OP_DESC "Batched GEMM+Reduce"
int profile_batched_gemm_reduce(int argc, char* argv[]) int profile_batched_gemm_reduce(int argc, char* argv[])
{ {
enum struct GemmMatrixLayout enum struct GemmMatrixLayout
...@@ -27,7 +30,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) ...@@ -27,7 +30,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
if(argc != 15) if(argc != 15)
{ {
printf("arg1: tensor operation (batched_gemm_reduce: Batched GEMM+Reduce)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n");
printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n");
...@@ -153,6 +156,4 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) ...@@ -153,6 +156,4 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
return 0; return 0;
} }
REGISTER_PROFILER_OPERATION("batched_gemm_reduce", REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_batched_gemm_reduce);
"Batched GEMM+Reduce",
profile_batched_gemm_reduce);
...@@ -25,10 +25,13 @@ enum struct ConvDataType ...@@ -25,10 +25,13 @@ enum struct ConvDataType
INT8_INT8_INT8, // 3 INT8_INT8_INT8, // 3
}; };
#define OP_NAME "conv_bwd_data"
#define OP_DESC "Convolution Backward Data"
static void print_helper_msg() static void print_helper_msg()
{ {
std::cout std::cout
<< "arg1: tensor operation (conv_bwd_data: Convolution Backward Data)\n" << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"
<< "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n" << "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n"
<< " 1: Input fp16, Weight fp16, Output fp16\n" << " 1: Input fp16, Weight fp16, Output fp16\n"
<< " 2: Input bf16, Weight bf16, Output bf16\n" << " 2: Input bf16, Weight bf16, Output bf16\n"
...@@ -184,4 +187,4 @@ int profile_conv_bwd_data(int argc, char* argv[]) ...@@ -184,4 +187,4 @@ int profile_conv_bwd_data(int argc, char* argv[])
return 1; return 1;
} }
REGISTER_PROFILER_OPERATION("conv_bwd_data", "Convolution Backward Data", profile_conv_bwd_data); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_bwd_data);
...@@ -25,11 +25,14 @@ enum struct ConvDataType ...@@ -25,11 +25,14 @@ enum struct ConvDataType
INT8_INT8_INT8, // 3 INT8_INT8_INT8, // 3
}; };
#define OP_NAME "conv_fwd"
#define OP_DESC "Convolution Forward"
static void print_helper_msg() static void print_helper_msg()
{ {
std::cout std::cout
// clang-format-off // clang-format-off
<< "arg1: tensor operation (conv_fwd: Convolution Forward)\n" << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"
<< "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n" << "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n"
<< " 1: Input fp16, Weight fp16, Output fp16\n" << " 1: Input fp16, Weight fp16, Output fp16\n"
<< " 2: Input bf16, Weight bf16, Output bf16\n" << " 2: Input bf16, Weight bf16, Output bf16\n"
...@@ -186,4 +189,4 @@ int profile_conv_fwd(int argc, char* argv[]) ...@@ -186,4 +189,4 @@ int profile_conv_fwd(int argc, char* argv[])
return 1; return 1;
} }
REGISTER_PROFILER_OPERATION("conv_fwd", "Convolution Forward", profile_conv_fwd); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_fwd);
...@@ -33,11 +33,14 @@ enum struct ConvOutputLayout ...@@ -33,11 +33,14 @@ enum struct ConvOutputLayout
NHWK, // 1 NHWK, // 1
}; };
#define OP_NAME "conv_fwd_bias_relu"
#define OP_DESC "Convolution Forward+Bias+ReLU"
int profile_conv_fwd_bias_relu(int argc, char* argv[]) int profile_conv_fwd_bias_relu(int argc, char* argv[])
{ {
if(argc != 25) if(argc != 25)
{ {
printf("arg1: tensor operation (conv_fwd_bias_relu: Convolution Forward+Bias+ReLU)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n"); printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n");
printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n"); printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n");
...@@ -116,6 +119,4 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[]) ...@@ -116,6 +119,4 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
return 0; return 0;
} }
REGISTER_PROFILER_OPERATION("conv_fwd_bias_relu", REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_fwd_bias_relu);
"Convolution Forward+Bias+ReLU",
profile_conv_fwd_bias_relu);
...@@ -33,12 +33,14 @@ enum struct ConvOutputLayout ...@@ -33,12 +33,14 @@ enum struct ConvOutputLayout
NHWK, // 1 NHWK, // 1
}; };
#define OP_NAME "conv_fwd_bias_relu_add"
#define OP_DESC "Convolution Forward+Bias+ReLU+Add"
int profile_conv_fwd_bias_relu_add(int argc, char* argv[]) int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
{ {
if(argc != 25) if(argc != 25)
{ {
printf( printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
"arg1: tensor operation (conv_fwd_bias_relu_add: Convolution Forward+Bias+ReLU+Add)\n");
printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n"); printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n");
printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n"); printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n");
...@@ -117,6 +119,4 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[]) ...@@ -117,6 +119,4 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
return 0; return 0;
} }
REGISTER_PROFILER_OPERATION("conv_fwd_bias_relu_add", REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_fwd_bias_relu_add);
"Convolution Forward+Bias+ReLU+Add",
profile_conv_fwd_bias_relu_add);
...@@ -25,9 +25,12 @@ enum struct GemmDataType ...@@ -25,9 +25,12 @@ enum struct GemmDataType
INT8_INT8_INT8, // 3 INT8_INT8_INT8, // 3
}; };
#define OP_NAME "gemm"
#define OP_DESC "GEMM"
static void print_helper_msg() static void print_helper_msg()
{ {
std::cout << "arg1: tensor operation (gemm: GEMM)\n" std::cout << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"
<< "arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n" << "arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n"
<< "arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n" << "arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"
<< " 1: A[m, k] * B[n, k] = C[m, n];\n" << " 1: A[m, k] * B[n, k] = C[m, n];\n"
...@@ -186,4 +189,4 @@ int profile_gemm(int argc, char* argv[]) ...@@ -186,4 +189,4 @@ int profile_gemm(int argc, char* argv[])
} }
} }
REGISTER_PROFILER_OPERATION("gemm", "GEMM", profile_gemm); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm);
...@@ -9,6 +9,9 @@ ...@@ -9,6 +9,9 @@
#include "profiler/profile_gemm_add_add_fastgelu_impl.hpp" #include "profiler/profile_gemm_add_add_fastgelu_impl.hpp"
#include "profiler_operation_registry.hpp" #include "profiler_operation_registry.hpp"
#define OP_NAME "gemm_add_add_fastgelu"
#define OP_DESC "GEMM+Add+Add+FastGeLU"
int profile_gemm_add_add_fastgelu(int argc, char* argv[]) int profile_gemm_add_add_fastgelu(int argc, char* argv[])
{ {
enum struct MatrixLayout enum struct MatrixLayout
...@@ -30,7 +33,7 @@ int profile_gemm_add_add_fastgelu(int argc, char* argv[]) ...@@ -30,7 +33,7 @@ int profile_gemm_add_add_fastgelu(int argc, char* argv[])
if(argc != 16) if(argc != 16)
{ {
// clang-format off // clang-format off
printf("arg1: tensor operation (gemm_add_add_fastgelu: GEMM+Add+Add+FastGeLU)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n"); printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n");
printf("arg3: matrix layout (0: E[m, n] = FastGeLU(A[m, k] * B[k, n] + D0[m, n] + D1[m, n]);\n"); printf("arg3: matrix layout (0: E[m, n] = FastGeLU(A[m, k] * B[k, n] + D0[m, n] + D1[m, n]);\n");
printf(" 1: E[m, n] = FastGeLU(A[m, k] * B[n, k] + D0[m, n] + D1[m, n]);\n"); printf(" 1: E[m, n] = FastGeLU(A[m, k] * B[n, k] + D0[m, n] + D1[m, n]);\n");
...@@ -152,6 +155,4 @@ int profile_gemm_add_add_fastgelu(int argc, char* argv[]) ...@@ -152,6 +155,4 @@ int profile_gemm_add_add_fastgelu(int argc, char* argv[])
} }
} }
REGISTER_PROFILER_OPERATION("gemm_add_add_fastgelu", REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_add_add_fastgelu);
"GEMM+Add+Add+FastGeLU",
profile_gemm_add_add_fastgelu);
...@@ -9,6 +9,9 @@ ...@@ -9,6 +9,9 @@
#include "profiler/profile_gemm_bias_add_reduce_impl.hpp" #include "profiler/profile_gemm_bias_add_reduce_impl.hpp"
#include "profiler_operation_registry.hpp" #include "profiler_operation_registry.hpp"
#define OP_NAME "gemm_bias_add_reduce"
#define OP_DESC "GEMM+Bias+Add+Reduce"
int profile_gemm_bias_add_reduce(int argc, char* argv[]) int profile_gemm_bias_add_reduce(int argc, char* argv[])
{ {
enum struct GemmMatrixLayout enum struct GemmMatrixLayout
...@@ -27,7 +30,7 @@ int profile_gemm_bias_add_reduce(int argc, char* argv[]) ...@@ -27,7 +30,7 @@ int profile_gemm_bias_add_reduce(int argc, char* argv[])
if(!(argc == 14 || argc == 15)) if(!(argc == 14 || argc == 15))
{ {
printf("arg1: tensor operation (gemm_bias_add_reduce: GEMM+Bias+Add+Reduce)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n");
printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n");
...@@ -161,6 +164,4 @@ int profile_gemm_bias_add_reduce(int argc, char* argv[]) ...@@ -161,6 +164,4 @@ int profile_gemm_bias_add_reduce(int argc, char* argv[])
return 0; return 0;
} }
REGISTER_PROFILER_OPERATION("gemm_bias_add_reduce", REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_bias_add_reduce);
"GEMM+Bias+Add+Reduce",
profile_gemm_bias_add_reduce);
...@@ -9,6 +9,9 @@ ...@@ -9,6 +9,9 @@
#include "profiler/profile_gemm_bilinear_impl.hpp" #include "profiler/profile_gemm_bilinear_impl.hpp"
#include "profiler_operation_registry.hpp" #include "profiler_operation_registry.hpp"
#define OP_NAME "gemm_bilinear"
#define OP_DESC "GEMM+Bilinear"
int profile_gemm_bilinear(int argc, char* argv[]) int profile_gemm_bilinear(int argc, char* argv[])
{ {
enum struct MatrixLayout enum struct MatrixLayout
...@@ -30,7 +33,7 @@ int profile_gemm_bilinear(int argc, char* argv[]) ...@@ -30,7 +33,7 @@ int profile_gemm_bilinear(int argc, char* argv[])
if(argc != 17) if(argc != 17)
{ {
// clang-format off // clang-format off
printf("arg1: tensor operation (gemm_bilinear: GEMM+Bilinear)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n"); printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n");
printf("arg3: matrix layout (0: E[m, n] = alpha * A[m, k] * B[k, n] + beta * D[m, n];\n"); printf("arg3: matrix layout (0: E[m, n] = alpha * A[m, k] * B[k, n] + beta * D[m, n];\n");
printf(" 1: E[m, n] = alpha * A[m, k] * B[n, k] + beta * D[m, n];\n"); printf(" 1: E[m, n] = alpha * A[m, k] * B[n, k] + beta * D[m, n];\n");
...@@ -146,4 +149,4 @@ int profile_gemm_bilinear(int argc, char* argv[]) ...@@ -146,4 +149,4 @@ int profile_gemm_bilinear(int argc, char* argv[])
} }
} }
REGISTER_PROFILER_OPERATION("gemm_bilinear", "GEMM+Bilinear", profile_gemm_bilinear); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_bilinear);
...@@ -9,6 +9,9 @@ ...@@ -9,6 +9,9 @@
#include "profiler/profile_gemm_reduce_impl.hpp" #include "profiler/profile_gemm_reduce_impl.hpp"
#include "profiler_operation_registry.hpp" #include "profiler_operation_registry.hpp"
#define OP_NAME "gemm_reduce"
#define OP_DESC "GEMM+Reduce"
int profile_gemm_reduce(int argc, char* argv[]) int profile_gemm_reduce(int argc, char* argv[])
{ {
enum struct GemmMatrixLayout enum struct GemmMatrixLayout
...@@ -27,7 +30,7 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -27,7 +30,7 @@ int profile_gemm_reduce(int argc, char* argv[])
if(!(argc == 14 || argc == 15)) if(!(argc == 14 || argc == 15))
{ {
printf("arg1: tensor operation (gemm_reduce: GEMM+Reduce)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n");
printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n");
...@@ -148,4 +151,4 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -148,4 +151,4 @@ int profile_gemm_reduce(int argc, char* argv[])
return 0; return 0;
} }
REGISTER_PROFILER_OPERATION("gemm_reduce", "GEMM+Reduce", profile_gemm_reduce); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_reduce);
...@@ -25,11 +25,14 @@ enum struct GemmDataType ...@@ -25,11 +25,14 @@ enum struct GemmDataType
INT8_INT8_INT8, // 3 INT8_INT8_INT8, // 3
}; };
#define OP_NAME "gemm_splitk"
#define OP_DESC "Split-K GEMM"
int profile_gemm_splitk(int argc, char* argv[]) int profile_gemm_splitk(int argc, char* argv[])
{ {
if(argc != 15) if(argc != 15)
{ {
printf("arg1: tensor operation (gemm_splitk: Split-K GEMM)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n"); printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n");
printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n");
printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n");
...@@ -148,4 +151,4 @@ int profile_gemm_splitk(int argc, char* argv[]) ...@@ -148,4 +151,4 @@ int profile_gemm_splitk(int argc, char* argv[])
} }
} }
REGISTER_PROFILER_OPERATION("gemm_splitk", "Split-K GEMM", profile_gemm_splitk); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_splitk);
...@@ -24,23 +24,25 @@ enum struct ConvDataType ...@@ -24,23 +24,25 @@ enum struct ConvDataType
BF16_F32_BF16, // 2 BF16_F32_BF16, // 2
}; };
#define OP_NAME "grouped_conv_bwd_weight"
#define OP_DESC "Grouped Convolution Backward Weight"
static void print_helper_msg() static void print_helper_msg()
{ {
std::cout std::cout << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"
<< "arg1: tensor operation (grouped_conv_bwd_weight: Grouped Convolution Backward Weight\n" << "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n"
<< "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n" << " 1: Input fp16, Weight fp16, Output fp16\n"
<< " 1: Input fp16, Weight fp16, Output fp16\n" << " 2: Input bf16, Weight fp32, Output bf16)\n"
<< " 2: Input bf16, Weight fp32, Output bf16)\n" << "arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, "
<< "arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, " "N, K, Ho, Wo]\n"
"N, K, Ho, Wo]\n" << " 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
<< " 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, " "N, Ho, Wo, K]\n"
"N, Ho, Wo, K]\n" << "arg4: verification (0: no, 1: yes)\n"
<< "arg4: verification (0: no, 1: yes)\n" << "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n"
<< "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n" << "arg6: print tensor value (0: no; 1: yes)\n"
<< "arg6: print tensor value (0: no; 1: yes)\n" << "arg7: time kernel (0: no, 1: yes)\n"
<< "arg7: time kernel (0: no, 1: yes)\n" << ck::utils::conv::get_conv_param_parser_helper_msg() << " SplitK\n"
<< ck::utils::conv::get_conv_param_parser_helper_msg() << " SplitK\n" << std::endl;
<< std::endl;
} }
} // namespace } // namespace
...@@ -177,6 +179,4 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[]) ...@@ -177,6 +179,4 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
return 1; return 1;
} }
REGISTER_PROFILER_OPERATION("grouped_conv_bwd_weight", REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_grouped_conv_bwd_weight);
"Grouped Convolution Backward Weight",
profile_grouped_conv_bwd_weight);
...@@ -25,11 +25,14 @@ enum struct ConvDataType ...@@ -25,11 +25,14 @@ enum struct ConvDataType
INT8_INT8_INT8, // 3 INT8_INT8_INT8, // 3
}; };
#define OP_NAME "grouped_conv_fwd"
#define OP_DESC "Grouped Convolution Forward"
static void print_helper_msg() static void print_helper_msg()
{ {
std::cout std::cout
// clang-format off // clang-format off
<< "arg1: tensor operation (grouped_conv_fwd: Grouped Convolution Forward)\n" << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"
<< "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n" << "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n"
<< " 1: Input fp16, Weight fp16, Output fp16\n" << " 1: Input fp16, Weight fp16, Output fp16\n"
<< " 2: Input bf16, Weight bf16, Output bf16\n" << " 2: Input bf16, Weight bf16, Output bf16\n"
...@@ -254,6 +257,4 @@ int profile_grouped_conv_fwd(int argc, char* argv[]) ...@@ -254,6 +257,4 @@ int profile_grouped_conv_fwd(int argc, char* argv[])
return 1; return 1;
} }
REGISTER_PROFILER_OPERATION("grouped_conv_fwd", REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_grouped_conv_fwd);
"Grouped Convolution Forward",
profile_grouped_conv_fwd);
...@@ -45,11 +45,14 @@ std::vector<int> argToIntArray(char* input) ...@@ -45,11 +45,14 @@ std::vector<int> argToIntArray(char* input)
return out; return out;
} }
#define OP_NAME "grouped_gemm"
#define OP_DESC "Grouped GEMM"
int profile_grouped_gemm(int argc, char* argv[]) int profile_grouped_gemm(int argc, char* argv[])
{ {
if(!(argc == 14)) if(!(argc == 14))
{ {
printf("arg1: tensor operation (grouped_gemm: Grouped GEMM)\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n"); printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n");
printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n");
printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n");
...@@ -163,4 +166,4 @@ int profile_grouped_gemm(int argc, char* argv[]) ...@@ -163,4 +166,4 @@ int profile_grouped_gemm(int argc, char* argv[])
return 0; return 0;
} }
REGISTER_PROFILER_OPERATION("grouped_gemm", "Grouped GEMM", profile_grouped_gemm); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_grouped_gemm);
...@@ -44,9 +44,12 @@ struct GroupnormArgParser ...@@ -44,9 +44,12 @@ struct GroupnormArgParser
} }
}; };
#define OP_NAME "groupnorm"
#define OP_DESC "Group Normalization"
void print_help_groupnorm() void print_help_groupnorm()
{ {
std::cout << "arg1: tensor operation (groupnorm: Group Normalization)\n" std::cout << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"
<< "arg2: data type (0: fp16; 1: fp32)\n" << "arg2: data type (0: fp16; 1: fp32)\n"
<< "arg3: verification (0: no; 1: yes)\n" << "arg3: verification (0: no; 1: yes)\n"
<< "arg4: initialization (0: no init; 1: integer value; 2: decimal value)\n" << "arg4: initialization (0: no init; 1: integer value; 2: decimal value)\n"
...@@ -106,4 +109,4 @@ int profile_groupnorm(int argc, char* argv[]) ...@@ -106,4 +109,4 @@ int profile_groupnorm(int argc, char* argv[])
return 0; return 0;
} }
REGISTER_PROFILER_OPERATION("groupnorm", "Group Normalization", profile_groupnorm); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_groupnorm);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment