Unverified Commit cec69bc3 authored by JD's avatar JD Committed by GitHub
Browse files

Add host API (#220)



* Add host API

* manually rebase on develop

* clean

* manually rebase on develop

* exclude tests from all target

* address review comments

* update client app name

* fix missing lib name

* clang-format update

* refactor

* refactor

* refactor

* refactor

* refactor

* fix test issue

* refactor

* refactor

* refactor

* upate cmake and readme
Co-authored-by: default avatarChao Liu <chao.liu2@amd.com>
parent 0f912e20
...@@ -48,7 +48,7 @@ template <typename ADataType, ...@@ -48,7 +48,7 @@ template <typename ADataType,
void profile_gemm_bias_relu_add_impl(int do_verification, void profile_gemm_bias_relu_add_impl(int do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
int nrepeat, bool time_kernel,
int M, int M,
int N, int N,
int K, int K,
...@@ -232,7 +232,8 @@ void profile_gemm_bias_relu_add_impl(int do_verification, ...@@ -232,7 +232,8 @@ void profile_gemm_bias_relu_add_impl(int do_verification,
{ {
std::string gemm_name = gemm_ptr->GetTypeString(); std::string gemm_name = gemm_ptr->GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat); float ave_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t flop = std::size_t(2) * M * N * K; std::size_t flop = std::size_t(2) * M * N * K;
......
...@@ -48,7 +48,7 @@ template <typename ADataType, ...@@ -48,7 +48,7 @@ template <typename ADataType,
void profile_gemm_bias_relu_impl(int do_verification, void profile_gemm_bias_relu_impl(int do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
int nrepeat, bool time_kernel,
int M, int M,
int N, int N,
int K, int K,
...@@ -212,7 +212,8 @@ void profile_gemm_bias_relu_impl(int do_verification, ...@@ -212,7 +212,8 @@ void profile_gemm_bias_relu_impl(int do_verification,
{ {
std::string gemm_name = gemm_ptr->GetTypeString(); std::string gemm_name = gemm_ptr->GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat); float ave_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t flop = std::size_t(2) * M * N * K; std::size_t flop = std::size_t(2) * M * N * K;
......
...@@ -91,7 +91,7 @@ template <typename ADataType, ...@@ -91,7 +91,7 @@ template <typename ADataType,
void profile_gemm_impl(int do_verification, void profile_gemm_impl(int do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
int nrepeat, bool time_kernel,
int M, int M,
int N, int N,
int K, int K,
...@@ -416,7 +416,8 @@ void profile_gemm_impl(int do_verification, ...@@ -416,7 +416,8 @@ void profile_gemm_impl(int do_verification,
std::string gemm_name = gemm_ptr->GetTypeString(); std::string gemm_name = gemm_ptr->GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat); float ave_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t flop = std::size_t(2) * M * N * K; std::size_t flop = std::size_t(2) * M * N * K;
......
...@@ -52,7 +52,7 @@ template <typename ADataType, ...@@ -52,7 +52,7 @@ template <typename ADataType,
bool profile_gemm_reduce_impl(int do_verification, bool profile_gemm_reduce_impl(int do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
int nrepeat, bool time_kernel,
int M, int M,
int N, int N,
int K, int K,
...@@ -243,36 +243,18 @@ bool profile_gemm_reduce_impl(int do_verification, ...@@ -243,36 +243,18 @@ bool profile_gemm_reduce_impl(int do_verification,
if(gemm_ptr->IsSupportedArgument(argument_ptr.get())) if(gemm_ptr->IsSupportedArgument(argument_ptr.get()))
{ {
// warm up // init DO, D1 to 0
invoker_ptr->Run(argument_ptr.get()); d0_device_buf.SetZero();
d1_device_buf.SetZero();
// timing float ave_time =
float total_time = 0; invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
for(int i = 0; i < nrepeat; ++i)
{
// init DO, D1 to 0
d0_device_buf.SetZero();
d1_device_buf.SetZero();
KernelTimer timer;
timer.Start();
invoker_ptr->Run(argument_ptr.get());
timer.End();
total_time += timer.GetElapsedTime();
}
float ave_time = total_time / nrepeat;
std::string gemm_name = gemm_ptr->GetTypeString(); std::string gemm_name = gemm_ptr->GetTypeString();
std::size_t flop = std::size_t(2) * M * N * K; std::size_t flop = std::size_t(2) * M * N * K;
std::size_t num_btype = sizeof(ADataType) * M * K + sizeof(BDataType) * K * M + std::size_t num_btype = sizeof(ADataType) * M * K + sizeof(BDataType) * K * N +
sizeof(CDataType) * M * N + sizeof(CDataType) * N; sizeof(CDataType) * M * N + sizeof(CDataType) * N;
float tflops = static_cast<float>(flop) / 1.E9 / ave_time; float tflops = static_cast<float>(flop) / 1.E9 / ave_time;
......
...@@ -49,7 +49,7 @@ template <typename ADataType, ...@@ -49,7 +49,7 @@ template <typename ADataType,
void profile_grouped_gemm_impl(int do_verification, void profile_grouped_gemm_impl(int do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
int nrepeat, bool time_kernel,
const std::vector<int>& Ms, const std::vector<int>& Ms,
const std::vector<int>& Ns, const std::vector<int>& Ns,
const std::vector<int>& Ks, const std::vector<int>& Ks,
...@@ -231,7 +231,8 @@ void profile_grouped_gemm_impl(int do_verification, ...@@ -231,7 +231,8 @@ void profile_grouped_gemm_impl(int do_verification,
{ {
std::string gemm_name = gemm_ptr->GetTypeString(); std::string gemm_name = gemm_ptr->GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat); float ave_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t flop = 0, num_btype = 0; std::size_t flop = 0, num_btype = 0;
for(std::size_t i = 0; i < gemm_shapes.size(); i++) for(std::size_t i = 0; i < gemm_shapes.size(); i++)
......
...@@ -157,7 +157,7 @@ void profile_reduce_impl_impl(bool do_verification, ...@@ -157,7 +157,7 @@ void profile_reduce_impl_impl(bool do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
bool do_dumpout, bool do_dumpout,
int nrepeat, bool time_kernel,
const std::vector<size_t>& inLengths, const std::vector<size_t>& inLengths,
const std::vector<int>& reduceDims, const std::vector<int>& reduceDims,
float alpha, float alpha,
...@@ -430,7 +430,8 @@ void profile_reduce_impl_impl(bool do_verification, ...@@ -430,7 +430,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto invoker_ptr = reduce_ptr->MakeInvokerPointer(); auto invoker_ptr = reduce_ptr->MakeInvokerPointer();
float avg_time = invoker_ptr->Run(argument_ptr.get(), nrepeat); float avg_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t num_bytes = std::size_t num_bytes =
invariant_total_length * reduce_total_length * sizeof(InDataType) + invariant_total_length * reduce_total_length * sizeof(InDataType) +
...@@ -516,7 +517,8 @@ void profile_reduce_impl_impl(bool do_verification, ...@@ -516,7 +517,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto invoker_ptr = reduce_ptr->MakeInvokerPointer(); auto invoker_ptr = reduce_ptr->MakeInvokerPointer();
float avg_time = invoker_ptr->Run(argument_ptr.get(), nrepeat); float avg_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t num_bytes = std::size_t num_bytes =
invariant_total_length * reduce_total_length * sizeof(InDataType) + invariant_total_length * reduce_total_length * sizeof(InDataType) +
...@@ -554,7 +556,8 @@ void profile_reduce_impl_impl(bool do_verification, ...@@ -554,7 +556,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto invoker2_ptr = reduce2_ptr->MakeInvokerPointer(); auto invoker2_ptr = reduce2_ptr->MakeInvokerPointer();
float avg_time_2 = invoker2_ptr->Run(argument2_ptr.get(), nrepeat); float avg_time_2 =
invoker2_ptr->Run(argument2_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t num_bytes_2 = std::size_t num_bytes_2 =
static_cast<size_t>(inLengths2[0]) * inLengths2[1] * sizeof(AccDataType); static_cast<size_t>(inLengths2[0]) * inLengths2[1] * sizeof(AccDataType);
...@@ -625,7 +628,7 @@ void profile_reduce_impl(bool do_verification, ...@@ -625,7 +628,7 @@ void profile_reduce_impl(bool do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
bool do_dumpout, bool do_dumpout,
int nrepeat, bool time_kernel,
const std::vector<size_t>& inLengths, const std::vector<size_t>& inLengths,
const std::vector<int>& reduceDims, const std::vector<int>& reduceDims,
ReduceTensorOp ReduceOpId, ReduceTensorOp ReduceOpId,
...@@ -663,7 +666,7 @@ void profile_reduce_impl(bool do_verification, ...@@ -663,7 +666,7 @@ void profile_reduce_impl(bool do_verification,
init_method, init_method,
do_log, do_log,
do_dumpout, do_dumpout,
nrepeat, time_kernel,
inLengths, inLengths,
reduceDims, reduceDims,
alpha, alpha,
......
...@@ -48,8 +48,8 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -48,8 +48,8 @@ int profile_batched_gemm(int argc, char* argv[])
printf(" 3: A[g, k, m] * B[g, n, k] = C[g, m, n])\n"); printf(" 3: A[g, k, m] * B[g, n, k] = C[g, m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n"); printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n"); printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount\n"); printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount\n");
exit(1); exit(1);
} }
...@@ -59,7 +59,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -59,7 +59,7 @@ int profile_batched_gemm(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]); const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]); const int N = std::stoi(argv[9]);
...@@ -82,7 +82,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -82,7 +82,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -102,7 +102,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -102,7 +102,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -122,7 +122,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -122,7 +122,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -142,7 +142,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -142,7 +142,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -162,7 +162,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -162,7 +162,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -182,7 +182,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -182,7 +182,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -202,7 +202,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -202,7 +202,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -222,7 +222,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -222,7 +222,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -242,7 +242,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -242,7 +242,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -262,7 +262,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -262,7 +262,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -282,7 +282,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -282,7 +282,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -302,7 +302,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -302,7 +302,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -322,7 +322,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -322,7 +322,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -342,7 +342,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -342,7 +342,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -362,7 +362,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -362,7 +362,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -382,7 +382,7 @@ int profile_batched_gemm(int argc, char* argv[]) ...@@ -382,7 +382,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
......
...@@ -33,8 +33,8 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) ...@@ -33,8 +33,8 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n"); printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n"); printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n"); printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount\n"); printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount\n");
printf("arg15: split k into mulitiple batch\n"); printf("arg15: split k into mulitiple batch\n");
exit(1); exit(1);
...@@ -45,7 +45,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) ...@@ -45,7 +45,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]); const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]); const int N = std::stoi(argv[9]);
...@@ -69,7 +69,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) ...@@ -69,7 +69,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -91,7 +91,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) ...@@ -91,7 +91,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -113,7 +113,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) ...@@ -113,7 +113,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -135,7 +135,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) ...@@ -135,7 +135,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
......
...@@ -44,7 +44,7 @@ int profile_conv_bwd_data(int argc, char* argv[]) ...@@ -44,7 +44,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
printf("arg6: verification (0: no; 1: yes)\n"); printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n"); printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, " printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n"); "RightPx\n");
exit(1); exit(1);
...@@ -57,7 +57,7 @@ int profile_conv_bwd_data(int argc, char* argv[]) ...@@ -57,7 +57,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]); const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]); const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]); const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]); const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]); const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]); const ck::index_t K = std::stoi(argv[11]);
...@@ -96,7 +96,7 @@ int profile_conv_bwd_data(int argc, char* argv[]) ...@@ -96,7 +96,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, StreamControl{nullptr, time_kernel},
N, N,
K, K,
C, C,
...@@ -122,7 +122,7 @@ int profile_conv_bwd_data(int argc, char* argv[]) ...@@ -122,7 +122,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, StreamControl{nullptr, time_kernel},
N, N,
K, K,
C, C,
...@@ -148,7 +148,7 @@ int profile_conv_bwd_data(int argc, char* argv[]) ...@@ -148,7 +148,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, StreamControl{nullptr, time_kernel},
N, N,
K, K,
C, C,
...@@ -174,7 +174,7 @@ int profile_conv_bwd_data(int argc, char* argv[]) ...@@ -174,7 +174,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, StreamControl{nullptr, time_kernel},
N, N,
K, K,
C, C,
......
...@@ -58,7 +58,7 @@ int profile_conv_bwd_weight(int argc, char* argv[]) ...@@ -58,7 +58,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]); const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]); const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]); const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]); const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]); const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]); const ck::index_t K = std::stoi(argv[11]);
...@@ -98,7 +98,7 @@ int profile_conv_bwd_weight(int argc, char* argv[]) ...@@ -98,7 +98,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
N, N,
K, K,
C, C,
...@@ -124,7 +124,7 @@ int profile_conv_bwd_weight(int argc, char* argv[]) ...@@ -124,7 +124,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
N, N,
K, K,
C, C,
......
...@@ -42,7 +42,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[]) ...@@ -42,7 +42,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
printf("arg6: verification (0: no; 1: yes)\n"); printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n"); printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, " printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n"); "RightPx\n");
exit(1); exit(1);
...@@ -55,7 +55,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[]) ...@@ -55,7 +55,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]); const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]); const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]); const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]); const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]); const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]); const ck::index_t K = std::stoi(argv[11]);
...@@ -93,7 +93,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[]) ...@@ -93,7 +93,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
N, N,
K, K,
C, C,
......
...@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[]) ...@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
printf("arg6: verification (0: no; 1: yes)\n"); printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n"); printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, " printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n"); "RightPx\n");
exit(1); exit(1);
...@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[]) ...@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]); const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]); const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]); const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]); const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]); const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]); const ck::index_t K = std::stoi(argv[11]);
...@@ -94,7 +94,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[]) ...@@ -94,7 +94,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
N, N,
K, K,
C, C,
......
...@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[]) ...@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
printf("arg6: verification (0: no; 1: yes)\n"); printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n"); printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, " printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n"); "RightPx\n");
exit(1); exit(1);
...@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[]) ...@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]); const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]); const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]); const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]); const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]); const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]); const ck::index_t K = std::stoi(argv[11]);
...@@ -95,7 +95,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[]) ...@@ -95,7 +95,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
N, N,
K, K,
C, C,
......
...@@ -95,7 +95,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial) ...@@ -95,7 +95,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
printf("arg6: verification (0: no; 1: yes)\n"); printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n"); printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, " printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n"); "RightPx\n");
return 1; return 1;
...@@ -108,7 +108,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial) ...@@ -108,7 +108,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
const bool do_verification = std::stoi(argv[6]); const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]); const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]); const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]); const bool time_kernel = std::stoi(argv[9]);
ck::utils::conv::ConvParams params = parse_conv_params(num_dim_spatial, argv, preParams); ck::utils::conv::ConvParams params = parse_conv_params(num_dim_spatial, argv, preParams);
...@@ -132,7 +132,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial) ...@@ -132,7 +132,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
params.N_, params.N_,
params.K_, params.K_,
params.C_, params.C_,
...@@ -157,7 +157,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial) ...@@ -157,7 +157,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
params.N_, params.N_,
params.K_, params.K_,
params.C_, params.C_,
...@@ -182,7 +182,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial) ...@@ -182,7 +182,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
params.N_, params.N_,
params.K_, params.K_,
params.C_, params.C_,
......
...@@ -119,7 +119,7 @@ template <int NDim, ...@@ -119,7 +119,7 @@ template <int NDim,
void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params, void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
bool do_verification, bool do_verification,
bool do_log, bool do_log,
int nrepeat, bool time_kernel,
int init_method, int init_method,
ConvLayouts) ConvLayouts)
{ {
...@@ -185,7 +185,7 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params, ...@@ -185,7 +185,7 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
reference_conv_fwd_fun); reference_conv_fwd_fun);
auto best_conf = run_engine.Profile( auto best_conf = run_engine.Profile(
conv::ConvolutionFwdInstances<InDataType, WeiDataType, OutDataType>::template Get<NDim>(), conv::ConvolutionFwdInstances<InDataType, WeiDataType, OutDataType>::template Get<NDim>(),
nrepeat, time_kernel,
do_verification, do_verification,
do_log); do_log);
...@@ -201,7 +201,7 @@ void profile_convnd_instances(ConvDataType data_type, ...@@ -201,7 +201,7 @@ void profile_convnd_instances(ConvDataType data_type,
const ck::utils::conv::ConvParams& params, const ck::utils::conv::ConvParams& params,
bool do_verification, bool do_verification,
bool do_log, bool do_log,
int nrepeat, bool time_kernel,
int init_method) int init_method)
{ {
switch(data_layout) switch(data_layout)
...@@ -214,7 +214,7 @@ void profile_convnd_instances(ConvDataType data_type, ...@@ -214,7 +214,7 @@ void profile_convnd_instances(ConvDataType data_type,
params, params,
do_verification, do_verification,
do_log, do_log,
nrepeat, time_kernel,
init_method, init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{}); ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break; break;
...@@ -223,7 +223,7 @@ void profile_convnd_instances(ConvDataType data_type, ...@@ -223,7 +223,7 @@ void profile_convnd_instances(ConvDataType data_type,
params, params,
do_verification, do_verification,
do_log, do_log,
nrepeat, time_kernel,
init_method, init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{}); ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break; break;
...@@ -232,7 +232,7 @@ void profile_convnd_instances(ConvDataType data_type, ...@@ -232,7 +232,7 @@ void profile_convnd_instances(ConvDataType data_type,
params, params,
do_verification, do_verification,
do_log, do_log,
nrepeat, time_kernel,
init_method, init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{}); ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break; break;
...@@ -241,7 +241,7 @@ void profile_convnd_instances(ConvDataType data_type, ...@@ -241,7 +241,7 @@ void profile_convnd_instances(ConvDataType data_type,
params, params,
do_verification, do_verification,
do_log, do_log,
nrepeat, time_kernel,
init_method, init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{}); ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break; break;
...@@ -256,7 +256,7 @@ void profile_convnd_instances(ConvDataType data_type, ...@@ -256,7 +256,7 @@ void profile_convnd_instances(ConvDataType data_type,
params, params,
do_verification, do_verification,
do_log, do_log,
nrepeat, time_kernel,
init_method, init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{}); ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break; break;
...@@ -265,7 +265,7 @@ void profile_convnd_instances(ConvDataType data_type, ...@@ -265,7 +265,7 @@ void profile_convnd_instances(ConvDataType data_type,
params, params,
do_verification, do_verification,
do_log, do_log,
nrepeat, time_kernel,
init_method, init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{}); ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break; break;
...@@ -274,7 +274,7 @@ void profile_convnd_instances(ConvDataType data_type, ...@@ -274,7 +274,7 @@ void profile_convnd_instances(ConvDataType data_type,
params, params,
do_verification, do_verification,
do_log, do_log,
nrepeat, time_kernel,
init_method, init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{}); ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break; break;
...@@ -283,7 +283,7 @@ void profile_convnd_instances(ConvDataType data_type, ...@@ -283,7 +283,7 @@ void profile_convnd_instances(ConvDataType data_type,
params, params,
do_verification, do_verification,
do_log, do_log,
nrepeat, time_kernel,
init_method, init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{}); ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break; break;
...@@ -304,7 +304,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[]) ...@@ -304,7 +304,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
bool do_verification{true}; bool do_verification{true};
int init_method{2}; int init_method{2};
bool do_log{false}; bool do_log{false};
int nrepeat{100}; bool time_kernel{false};
int num_dim_spatial{2}; int num_dim_spatial{2};
ConvParams params; ConvParams params;
...@@ -318,7 +318,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[]) ...@@ -318,7 +318,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
do_verification = std::stoi(argv[4]); do_verification = std::stoi(argv[4]);
init_method = std::stoi(argv[5]); init_method = std::stoi(argv[5]);
do_log = std::stoi(argv[6]); do_log = std::stoi(argv[6]);
nrepeat = std::stoi(argv[7]); time_kernel = std::stoi(argv[7]);
num_dim_spatial = std::stoi(argv[8]); num_dim_spatial = std::stoi(argv[8]);
} }
if(argc >= 10) if(argc >= 10)
...@@ -332,15 +332,15 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[]) ...@@ -332,15 +332,15 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
{ {
case 1: case 1:
profile_convnd_instances<1>( profile_convnd_instances<1>(
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method); data_type, data_layout, params, do_verification, do_log, time_kernel, init_method);
break; break;
case 2: case 2:
profile_convnd_instances<2>( profile_convnd_instances<2>(
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method); data_type, data_layout, params, do_verification, do_log, time_kernel, init_method);
break; break;
case 3: case 3:
profile_convnd_instances<3>( profile_convnd_instances<3>(
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method); data_type, data_layout, params, do_verification, do_log, time_kernel, init_method);
break; break;
default: default:
throw std::runtime_error("profile_conv_fwd: unsupported num_dim_spatial value: " + throw std::runtime_error("profile_conv_fwd: unsupported num_dim_spatial value: " +
......
...@@ -38,8 +38,8 @@ int profile_gemm(int argc, char* argv[]) ...@@ -38,8 +38,8 @@ int profile_gemm(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n"); printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n"); printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n"); printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n"); printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
printf("arg14: split k into mulitiple batch\n"); printf("arg14: split k into mulitiple batch\n");
exit(1); exit(1);
...@@ -50,7 +50,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -50,7 +50,7 @@ int profile_gemm(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]); const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]); const int N = std::stoi(argv[9]);
...@@ -74,7 +74,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -74,7 +74,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -94,7 +94,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -94,7 +94,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -114,7 +114,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -114,7 +114,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -134,7 +134,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -134,7 +134,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -154,7 +154,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -154,7 +154,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -174,7 +174,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -174,7 +174,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -194,7 +194,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -194,7 +194,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -214,7 +214,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -214,7 +214,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -234,7 +234,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -234,7 +234,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -254,7 +254,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -254,7 +254,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -274,7 +274,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -274,7 +274,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -294,7 +294,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -294,7 +294,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -314,7 +314,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -314,7 +314,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -334,7 +334,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -334,7 +334,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -354,7 +354,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -354,7 +354,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -374,7 +374,7 @@ int profile_gemm(int argc, char* argv[]) ...@@ -374,7 +374,7 @@ int profile_gemm(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
......
...@@ -36,8 +36,8 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -36,8 +36,8 @@ int profile_gemm_bias_2d(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n"); printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n"); printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n"); printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n"); printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
printf("arg14: alpha\n"); printf("arg14: alpha\n");
printf("arg15: beta\n"); printf("arg15: beta\n");
...@@ -50,7 +50,7 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -50,7 +50,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]); const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]); const int N = std::stoi(argv[9]);
...@@ -76,7 +76,7 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -76,7 +76,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -99,7 +99,7 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -99,7 +99,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -122,7 +122,7 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -122,7 +122,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -145,7 +145,7 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -145,7 +145,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -168,7 +168,7 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -168,7 +168,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -191,7 +191,7 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -191,7 +191,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -214,7 +214,7 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -214,7 +214,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -237,7 +237,7 @@ int profile_gemm_bias_2d(int argc, char* argv[]) ...@@ -237,7 +237,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
......
...@@ -36,8 +36,8 @@ int profile_gemm_bias_relu(int argc, char* argv[]) ...@@ -36,8 +36,8 @@ int profile_gemm_bias_relu(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n"); printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n"); printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n"); printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n"); printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
printf("arg14: split k into mulitiple batch\n"); printf("arg14: split k into mulitiple batch\n");
exit(1); exit(1);
...@@ -48,7 +48,7 @@ int profile_gemm_bias_relu(int argc, char* argv[]) ...@@ -48,7 +48,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]); const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]); const int N = std::stoi(argv[9]);
...@@ -69,7 +69,7 @@ int profile_gemm_bias_relu(int argc, char* argv[]) ...@@ -69,7 +69,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -88,7 +88,7 @@ int profile_gemm_bias_relu(int argc, char* argv[]) ...@@ -88,7 +88,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -107,7 +107,7 @@ int profile_gemm_bias_relu(int argc, char* argv[]) ...@@ -107,7 +107,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -126,7 +126,7 @@ int profile_gemm_bias_relu(int argc, char* argv[]) ...@@ -126,7 +126,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
......
...@@ -36,8 +36,8 @@ int profile_gemm_bias_relu_add(int argc, char* argv[]) ...@@ -36,8 +36,8 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n"); printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n"); printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n"); printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, StrideC1\n"); printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, StrideC1\n");
printf("arg15: split k into mulitiple batch\n"); printf("arg15: split k into mulitiple batch\n");
exit(1); exit(1);
...@@ -48,7 +48,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[]) ...@@ -48,7 +48,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]); const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]); const int N = std::stoi(argv[9]);
...@@ -70,7 +70,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[]) ...@@ -70,7 +70,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -90,7 +90,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[]) ...@@ -90,7 +90,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -110,7 +110,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[]) ...@@ -110,7 +110,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -130,7 +130,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[]) ...@@ -130,7 +130,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
......
...@@ -32,8 +32,8 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -32,8 +32,8 @@ int profile_gemm_reduce(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n"); printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n"); printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n"); printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n"); printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n"); printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
printf("arg14: split k into mulitiple batch\n"); printf("arg14: split k into mulitiple batch\n");
exit(1); exit(1);
...@@ -44,7 +44,7 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -44,7 +44,7 @@ int profile_gemm_reduce(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]); const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]); const int N = std::stoi(argv[9]);
...@@ -66,7 +66,7 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -66,7 +66,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -87,7 +87,7 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -87,7 +87,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -108,7 +108,7 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -108,7 +108,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
...@@ -129,7 +129,7 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -129,7 +129,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
nrepeat, time_kernel,
M, M,
N, N,
K, K,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment