Commit b134b7d6 authored by carlushuang's avatar carlushuang
Browse files

Merge remote-tracking branch 'origin/develop' into cpu_avx2

parents 090ba885 9f71ff48
......@@ -7,7 +7,7 @@
#include "tensor_layout.hpp"
#include "device_tensor.hpp"
#include "element_wise_operation.hpp"
#include "element_wise_reduce_operation.hpp"
#include "reduction_operator.hpp"
#include "device_gemm_reduce.hpp"
#include "reference_gemm.hpp"
......@@ -20,8 +20,7 @@ using DeviceGemmReduceNoOpPtr = ck::tensor_operation::device::DeviceGemmReducePt
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::ReduceSum,
ck::tensor_operation::element_wise::ReduceSquareSum>;
ck::tensor_operation::element_wise::UnarySquare<float, float, false>>;
void add_device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instances(
std::vector<DeviceGemmReduceNoOpPtr>&);
......@@ -53,7 +52,7 @@ template <typename ADataType,
bool profile_gemm_reduce_impl(int do_verification,
int init_method,
bool do_log,
int nrepeat,
bool time_kernel,
int M,
int N,
int K,
......@@ -113,17 +112,19 @@ bool profile_gemm_reduce_impl(int do_verification,
b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5}, num_thread);
}
using AElementOp = ck::tensor_operation::element_wise::PassThrough;
using BElementOp = ck::tensor_operation::element_wise::PassThrough;
using CElementOp = ck::tensor_operation::element_wise::PassThrough;
using D0ReduceOp = ck::tensor_operation::element_wise::ReduceSum;
using D1ReduceOp = ck::tensor_operation::element_wise::ReduceSquareSum;
using AElementOp = ck::tensor_operation::element_wise::PassThrough;
using BElementOp = ck::tensor_operation::element_wise::PassThrough;
using CElementOp = ck::tensor_operation::element_wise::PassThrough;
using D0ReduceOp = ck::reduce::Add<float>;
using D1ReduceOp = ck::reduce::Add<float>;
using D1ElementOp = ck::tensor_operation::element_wise::UnarySquare<float, float, false>;
const auto a_element_op = AElementOp{};
const auto b_element_op = BElementOp{};
const auto c_element_op = CElementOp{};
const auto d0_reduce_op = D0ReduceOp{};
const auto d1_reduce_op = D1ReduceOp{};
const auto a_element_op = AElementOp{};
const auto b_element_op = BElementOp{};
const auto c_element_op = CElementOp{};
const auto d0_reduce_op = D0ReduceOp{};
const auto d1_reduce_op = D1ReduceOp{};
const auto d1_element_op = D1ElementOp{};
if(do_verification)
{
......@@ -140,17 +141,21 @@ bool profile_gemm_reduce_impl(int do_verification,
for(int m = 0; m < M; ++m)
{
float d0_acc = d0_reduce_op.GetReduceZeroValue();
float d1_acc = d1_reduce_op.GetReduceZeroValue();
float d0_acc = d0_reduce_op.GetReductionZeroVal();
float d1_acc = d1_reduce_op.GetReductionZeroVal();
for(int n = 0; n < N; ++n)
{
d0_reduce_op.Reduce(d0_acc, c_m_n_host_result(m, n));
d1_reduce_op.Reduce(d1_acc, c_m_n_host_result(m, n));
float d0_val = ck::type_convert<float>(c_m_n_host_result(m, n));
float d1_val;
d1_element_op(d1_val, d0_val);
d0_reduce_op(d0_acc, d0_val);
d1_reduce_op(d1_acc, d1_val);
}
d0_m_host_result(m) = d0_acc;
d1_m_host_result(m) = d1_acc;
d0_m_host_result(m) = ck::type_convert<DDataType>(d0_acc);
d1_m_host_result(m) = ck::type_convert<DDataType>(d1_acc);
}
}
......@@ -232,43 +237,24 @@ bool profile_gemm_reduce_impl(int do_verification,
a_element_op,
b_element_op,
c_element_op,
d0_reduce_op,
d1_reduce_op);
d1_element_op);
auto invoker_ptr = gemm_ptr->MakeInvokerPointer();
if(gemm_ptr->IsSupportedArgument(argument_ptr.get()))
{
// warm up
invoker_ptr->Run(argument_ptr.get());
// timing
float total_time = 0;
for(int i = 0; i < nrepeat; ++i)
{
// init DO, D1 to 0
d0_device_buf.SetZero();
d1_device_buf.SetZero();
KernelTimer timer;
timer.Start();
invoker_ptr->Run(argument_ptr.get());
timer.End();
total_time += timer.GetElapsedTime();
}
// init DO, D1 to 0
d0_device_buf.SetZero();
d1_device_buf.SetZero();
float ave_time = total_time / nrepeat;
float ave_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::string gemm_name = gemm_ptr->GetTypeString();
std::size_t flop = std::size_t(2) * M * N * K;
std::size_t num_btype = sizeof(ADataType) * M * K + sizeof(BDataType) * K * M +
std::size_t num_btype = sizeof(ADataType) * M * K + sizeof(BDataType) * K * N +
sizeof(CDataType) * M * N + sizeof(CDataType) * N;
float tflops = static_cast<float>(flop) / 1.E9 / ave_time;
......
......@@ -49,13 +49,13 @@ template <typename ADataType,
void profile_grouped_gemm_impl(int do_verification,
int init_method,
bool do_log,
int nrepeat,
std::vector<int> Ms,
std::vector<int> Ns,
std::vector<int> Ks,
std::vector<int> StrideAs,
std::vector<int> StrideBs,
std::vector<int> StrideCs)
bool time_kernel,
const std::vector<int>& Ms,
const std::vector<int>& Ns,
const std::vector<int>& Ks,
const std::vector<int>& StrideAs,
const std::vector<int>& StrideBs,
const std::vector<int>& StrideCs)
{
auto f_host_tensor_descriptor =
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
......@@ -71,7 +71,7 @@ void profile_grouped_gemm_impl(int do_verification,
}
};
int group_count = Ms.size();
std::size_t group_count = Ms.size();
if(!(group_count == Ns.size() && group_count == Ks.size() && group_count == StrideAs.size() &&
group_count == StrideBs.size() && group_count == StrideCs.size()))
......@@ -83,7 +83,7 @@ void profile_grouped_gemm_impl(int do_verification,
std::vector<Tensor<BDataType>> b_k_n;
std::vector<Tensor<CDataType>> c_m_n_device_results;
for(int i = 0; i < Ms.size(); i++)
for(std::size_t i = 0; i < group_count; i++)
{
a_m_k.push_back(
Tensor<ADataType>(f_host_tensor_descriptor(Ms[i], Ks[i], StrideAs[i], ALayout{})));
......@@ -144,7 +144,7 @@ void profile_grouped_gemm_impl(int do_verification,
gemm_shapes.reserve(group_count);
for(int i = 0; i < group_count; i++)
for(std::size_t i = 0; i < group_count; i++)
{
a_device_buf.emplace_back(
std::make_unique<DeviceMem>(sizeof(ADataType) * a_m_k[i].mDesc.GetElementSpace()));
......@@ -231,10 +231,11 @@ void profile_grouped_gemm_impl(int do_verification,
{
std::string gemm_name = gemm_ptr->GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat);
float ave_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t flop = 0, num_btype = 0;
for(int i = 0; i < gemm_shapes.size(); i++)
for(std::size_t i = 0; i < gemm_shapes.size(); i++)
{
flop += std::size_t(2) * Ms[i] * Ns[i] * Ks[i];
......@@ -258,7 +259,7 @@ void profile_grouped_gemm_impl(int do_verification,
if(do_verification)
{
for(int i = 0; i < gemm_shapes.size(); i++)
for(std::size_t i = 0; i < gemm_shapes.size(); i++)
{
c_device_buf[i]->FromDevice(c_m_n_device_results[i].mData.data());
......
......@@ -157,7 +157,7 @@ void profile_reduce_impl_impl(bool do_verification,
int init_method,
bool do_log,
bool do_dumpout,
int nrepeat,
bool time_kernel,
const std::vector<size_t>& inLengths,
const std::vector<int>& reduceDims,
float alpha,
......@@ -430,7 +430,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto invoker_ptr = reduce_ptr->MakeInvokerPointer();
float avg_time = invoker_ptr->Run(argument_ptr.get(), nrepeat);
float avg_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t num_bytes =
invariant_total_length * reduce_total_length * sizeof(InDataType) +
......@@ -516,7 +517,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto invoker_ptr = reduce_ptr->MakeInvokerPointer();
float avg_time = invoker_ptr->Run(argument_ptr.get(), nrepeat);
float avg_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t num_bytes =
invariant_total_length * reduce_total_length * sizeof(InDataType) +
......@@ -554,7 +556,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto invoker2_ptr = reduce2_ptr->MakeInvokerPointer();
float avg_time_2 = invoker2_ptr->Run(argument2_ptr.get(), nrepeat);
float avg_time_2 =
invoker2_ptr->Run(argument2_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t num_bytes_2 =
static_cast<size_t>(inLengths2[0]) * inLengths2[1] * sizeof(AccDataType);
......@@ -625,7 +628,7 @@ void profile_reduce_impl(bool do_verification,
int init_method,
bool do_log,
bool do_dumpout,
int nrepeat,
bool time_kernel,
const std::vector<size_t>& inLengths,
const std::vector<int>& reduceDims,
ReduceTensorOp ReduceOpId,
......@@ -663,7 +666,7 @@ void profile_reduce_impl(bool do_verification,
init_method,
do_log,
do_dumpout,
nrepeat,
time_kernel,
inLengths,
reduceDims,
alpha,
......
......@@ -48,8 +48,8 @@ int profile_batched_gemm(int argc, char* argv[])
printf(" 3: A[g, k, m] * B[g, n, k] = C[g, m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount\n");
exit(1);
}
......@@ -59,7 +59,7 @@ int profile_batched_gemm(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]);
const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]);
......@@ -82,7 +82,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -102,7 +102,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -122,7 +122,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -142,7 +142,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -162,7 +162,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -182,7 +182,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -202,7 +202,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -222,7 +222,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -242,7 +242,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -262,7 +262,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -282,7 +282,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -302,7 +302,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -322,7 +322,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -342,7 +342,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -362,7 +362,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -382,7 +382,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......
......@@ -33,8 +33,8 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount\n");
printf("arg15: split k into mulitiple batch\n");
exit(1);
......@@ -45,7 +45,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]);
const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]);
......@@ -69,7 +69,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -91,7 +91,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -113,7 +113,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -135,7 +135,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......
......@@ -44,7 +44,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n");
printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
......@@ -57,7 +57,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]);
......@@ -96,7 +96,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
StreamControl{nullptr, time_kernel},
N,
K,
C,
......@@ -122,7 +122,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
StreamControl{nullptr, time_kernel},
N,
K,
C,
......@@ -148,7 +148,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
StreamControl{nullptr, time_kernel},
N,
K,
C,
......@@ -174,7 +174,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
StreamControl{nullptr, time_kernel},
N,
K,
C,
......
......@@ -58,7 +58,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]);
......@@ -98,7 +98,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
N,
K,
C,
......@@ -124,7 +124,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
N,
K,
C,
......
......@@ -42,7 +42,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n");
printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
......@@ -55,7 +55,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]);
......@@ -93,7 +93,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
N,
K,
C,
......
......@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n");
printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
......@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]);
......@@ -94,7 +94,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
N,
K,
C,
......
......@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n");
printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
......@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]);
......@@ -95,7 +95,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
N,
K,
C,
......
......@@ -39,40 +39,40 @@ ck::utils::conv::ConvParams parse_conv_params(int num_dim_spatial, char* argv[],
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
ck::utils::conv::ConvParams params;
params.num_dim_spatial = num_dim_spatial;
params.N = std::stoi(argv[arg_idx++]);
params.K = std::stoi(argv[arg_idx++]);
params.C = std::stoi(argv[arg_idx++]);
params.num_dim_spatial_ = num_dim_spatial;
params.N_ = std::stoi(argv[arg_idx++]);
params.K_ = std::stoi(argv[arg_idx++]);
params.C_ = std::stoi(argv[arg_idx++]);
params.filter_spatial_lengths.resize(num_dim_spatial);
params.filter_spatial_lengths_.resize(num_dim_spatial);
for(int i = 0; i < num_dim_spatial; ++i)
{
params.filter_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
params.filter_spatial_lengths_[i] = std::stoi(argv[arg_idx++]);
}
params.input_spatial_lengths.resize(num_dim_spatial);
params.input_spatial_lengths_.resize(num_dim_spatial);
for(int i = 0; i < num_dim_spatial; ++i)
{
params.input_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
params.input_spatial_lengths_[i] = std::stoi(argv[arg_idx++]);
}
params.conv_filter_strides.resize(num_dim_spatial);
params.conv_filter_strides_.resize(num_dim_spatial);
for(int i = 0; i < num_dim_spatial; ++i)
{
params.conv_filter_strides[i] = std::stoi(argv[arg_idx++]);
params.conv_filter_strides_[i] = std::stoi(argv[arg_idx++]);
}
params.conv_filter_dilations.resize(num_dim_spatial);
params.conv_filter_dilations_.resize(num_dim_spatial);
for(int i = 0; i < num_dim_spatial; ++i)
{
params.conv_filter_dilations[i] = std::stoi(argv[arg_idx++]);
params.conv_filter_dilations_[i] = std::stoi(argv[arg_idx++]);
}
params.input_left_pads.resize(num_dim_spatial);
params.input_left_pads_.resize(num_dim_spatial);
for(int i = 0; i < num_dim_spatial; ++i)
{
params.input_left_pads[i] = std::stoi(argv[arg_idx++]);
params.input_left_pads_[i] = std::stoi(argv[arg_idx++]);
}
params.input_right_pads.resize(num_dim_spatial);
params.input_right_pads_.resize(num_dim_spatial);
for(int i = 0; i < num_dim_spatial; ++i)
{
params.input_right_pads[i] = std::stoi(argv[arg_idx++]);
params.input_right_pads_[i] = std::stoi(argv[arg_idx++]);
}
return params;
......@@ -95,7 +95,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n");
printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
return 1;
......@@ -108,7 +108,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
const bool time_kernel = std::stoi(argv[9]);
ck::utils::conv::ConvParams params = parse_conv_params(num_dim_spatial, argv, preParams);
......@@ -132,17 +132,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification,
init_method,
do_log,
nrepeat,
params.N,
params.K,
params.C,
params.input_spatial_lengths,
params.filter_spatial_lengths,
time_kernel,
params.N_,
params.K_,
params.C_,
params.input_spatial_lengths_,
params.filter_spatial_lengths_,
params.GetOutputSpatialLengths(),
params.conv_filter_strides,
params.conv_filter_dilations,
params.input_left_pads,
params.input_right_pads);
params.conv_filter_strides_,
params.conv_filter_dilations_,
params.input_left_pads_,
params.input_right_pads_);
break;
case 2:
......@@ -157,17 +157,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification,
init_method,
do_log,
nrepeat,
params.N,
params.K,
params.C,
params.input_spatial_lengths,
params.filter_spatial_lengths,
time_kernel,
params.N_,
params.K_,
params.C_,
params.input_spatial_lengths_,
params.filter_spatial_lengths_,
params.GetOutputSpatialLengths(),
params.conv_filter_strides,
params.conv_filter_dilations,
params.input_left_pads,
params.input_right_pads);
params.conv_filter_strides_,
params.conv_filter_dilations_,
params.input_left_pads_,
params.input_right_pads_);
break;
case 3:
......@@ -182,17 +182,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification,
init_method,
do_log,
nrepeat,
params.N,
params.K,
params.C,
params.input_spatial_lengths,
params.filter_spatial_lengths,
time_kernel,
params.N_,
params.K_,
params.C_,
params.input_spatial_lengths_,
params.filter_spatial_lengths_,
params.GetOutputSpatialLengths(),
params.conv_filter_strides,
params.conv_filter_dilations,
params.input_left_pads,
params.input_right_pads);
params.conv_filter_strides_,
params.conv_filter_dilations_,
params.input_left_pads_,
params.input_right_pads_);
break;
default: break;
......
......@@ -5,7 +5,7 @@
#include <vector>
#include <half.hpp>
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "element_wise_operation.hpp"
#include "fill.hpp"
#include "profile_convnd_fwd.hpp"
......@@ -119,7 +119,7 @@ template <int NDim,
void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
bool do_verification,
bool do_log,
int nrepeat,
bool time_kernel,
int init_method,
ConvLayouts)
{
......@@ -185,7 +185,7 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
reference_conv_fwd_fun);
auto best_conf = run_engine.Profile(
conv::ConvolutionFwdInstances<InDataType, WeiDataType, OutDataType>::template Get<NDim>(),
nrepeat,
time_kernel,
do_verification,
do_log);
......@@ -201,7 +201,7 @@ void profile_convnd_instances(ConvDataType data_type,
const ck::utils::conv::ConvParams& params,
bool do_verification,
bool do_log,
int nrepeat,
bool time_kernel,
int init_method)
{
switch(data_layout)
......@@ -214,7 +214,7 @@ void profile_convnd_instances(ConvDataType data_type,
params,
do_verification,
do_log,
nrepeat,
time_kernel,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break;
......@@ -223,7 +223,7 @@ void profile_convnd_instances(ConvDataType data_type,
params,
do_verification,
do_log,
nrepeat,
time_kernel,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break;
......@@ -232,7 +232,7 @@ void profile_convnd_instances(ConvDataType data_type,
params,
do_verification,
do_log,
nrepeat,
time_kernel,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break;
......@@ -241,7 +241,7 @@ void profile_convnd_instances(ConvDataType data_type,
params,
do_verification,
do_log,
nrepeat,
time_kernel,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break;
......@@ -256,7 +256,7 @@ void profile_convnd_instances(ConvDataType data_type,
params,
do_verification,
do_log,
nrepeat,
time_kernel,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break;
......@@ -265,7 +265,7 @@ void profile_convnd_instances(ConvDataType data_type,
params,
do_verification,
do_log,
nrepeat,
time_kernel,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break;
......@@ -274,7 +274,7 @@ void profile_convnd_instances(ConvDataType data_type,
params,
do_verification,
do_log,
nrepeat,
time_kernel,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break;
......@@ -283,7 +283,7 @@ void profile_convnd_instances(ConvDataType data_type,
params,
do_verification,
do_log,
nrepeat,
time_kernel,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break;
......@@ -304,7 +304,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
bool do_verification{true};
int init_method{2};
bool do_log{false};
int nrepeat{100};
bool time_kernel{false};
int num_dim_spatial{2};
ConvParams params;
......@@ -318,7 +318,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
do_verification = std::stoi(argv[4]);
init_method = std::stoi(argv[5]);
do_log = std::stoi(argv[6]);
nrepeat = std::stoi(argv[7]);
time_kernel = std::stoi(argv[7]);
num_dim_spatial = std::stoi(argv[8]);
}
if(argc >= 10)
......@@ -332,15 +332,15 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
{
case 1:
profile_convnd_instances<1>(
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method);
data_type, data_layout, params, do_verification, do_log, time_kernel, init_method);
break;
case 2:
profile_convnd_instances<2>(
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method);
data_type, data_layout, params, do_verification, do_log, time_kernel, init_method);
break;
case 3:
profile_convnd_instances<3>(
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method);
data_type, data_layout, params, do_verification, do_log, time_kernel, init_method);
break;
default:
throw std::runtime_error("profile_conv_fwd: unsupported num_dim_spatial value: " +
......
......@@ -38,8 +38,8 @@ int profile_gemm(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
printf("arg14: split k into mulitiple batch\n");
exit(1);
......@@ -50,7 +50,7 @@ int profile_gemm(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]);
const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]);
......@@ -74,7 +74,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -94,7 +94,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -114,7 +114,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -134,7 +134,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -154,7 +154,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -174,7 +174,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -194,7 +194,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -214,7 +214,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -234,7 +234,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -254,7 +254,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -274,7 +274,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -294,7 +294,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -314,7 +314,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -334,7 +334,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -354,7 +354,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -374,7 +374,7 @@ int profile_gemm(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......
......@@ -36,8 +36,8 @@ int profile_gemm_bias_2d(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
printf("arg14: alpha\n");
printf("arg15: beta\n");
......@@ -50,7 +50,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]);
const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]);
......@@ -76,7 +76,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -99,7 +99,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -122,7 +122,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -145,7 +145,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -168,7 +168,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -191,7 +191,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -214,7 +214,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -237,7 +237,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......
......@@ -36,8 +36,8 @@ int profile_gemm_bias_relu(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
printf("arg14: split k into mulitiple batch\n");
exit(1);
......@@ -48,7 +48,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]);
const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]);
......@@ -69,7 +69,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -88,7 +88,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -107,7 +107,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -126,7 +126,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......
......@@ -36,8 +36,8 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, StrideC1\n");
printf("arg15: split k into mulitiple batch\n");
exit(1);
......@@ -48,7 +48,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]);
const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]);
......@@ -70,7 +70,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -90,7 +90,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -110,7 +110,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -130,7 +130,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......
......@@ -32,8 +32,8 @@ int profile_gemm_reduce(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
printf("arg14: split k into mulitiple batch\n");
exit(1);
......@@ -44,7 +44,7 @@ int profile_gemm_reduce(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]);
const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]);
......@@ -66,7 +66,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -87,7 +87,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -108,7 +108,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......@@ -129,7 +129,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
M,
N,
K,
......
......@@ -54,8 +54,8 @@ int profile_grouped_gemm(int argc, char* argv[])
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg7: run kernel # of times (>1)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n");
printf("arg7: time kernel (0=n0, 1=yes)\n");
printf("arg8 to 13: Ms, Ns, Ks, StrideAs, StrideBs, StrideCs (e.g., 256,256 128,128 64,64 "
"64,64 64,64 128,128)\n");
exit(1);
......@@ -66,7 +66,7 @@ int profile_grouped_gemm(int argc, char* argv[])
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const int nrepeat = std::stoi(argv[7]);
const bool time_kernel = std::stoi(argv[7]);
const auto Ms = argToIntArray(argv[8]);
const auto Ns = argToIntArray(argv[9]);
......@@ -86,7 +86,7 @@ int profile_grouped_gemm(int argc, char* argv[])
ck::tensor_layout::gemm::RowMajor>(do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
Ms,
Ns,
Ks,
......@@ -104,7 +104,7 @@ int profile_grouped_gemm(int argc, char* argv[])
ck::tensor_layout::gemm::RowMajor>(do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
Ms,
Ns,
Ks,
......@@ -122,7 +122,7 @@ int profile_grouped_gemm(int argc, char* argv[])
ck::tensor_layout::gemm::RowMajor>(do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
Ms,
Ns,
Ks,
......@@ -140,7 +140,7 @@ int profile_grouped_gemm(int argc, char* argv[])
ck::tensor_layout::gemm::RowMajor>(do_verification,
init_method,
do_log,
nrepeat,
time_kernel,
Ms,
Ns,
Ks,
......
......@@ -144,7 +144,7 @@ class AppArgs
bool do_dumpout = false;
int init_method;
int nrepeat;
bool time_kernel;
bool need_indices = false;
......@@ -186,7 +186,7 @@ class AppArgs
int processArgs(int argc, char* argv[])
{
unsigned int ch;
int ch;
optind++; // to skip the "reduce" module name
......@@ -295,7 +295,7 @@ class AppArgs
throw std::runtime_error("Invalid cmd-line arguments, more argumetns are needed!");
init_method = std::atoi(argv[optind++]);
nrepeat = std::atoi(argv[optind]);
time_kernel = std::atoi(argv[optind]);
if(scales.empty())
{
......@@ -354,7 +354,7 @@ int profile_reduce(int argc, char* argv[])
args.init_method,
args.do_log,
args.do_dumpout,
args.nrepeat,
args.time_kernel,
args.inLengths,
args.reduceDims,
args.reduceOp,
......@@ -369,7 +369,7 @@ int profile_reduce(int argc, char* argv[])
args.init_method,
args.do_log,
args.do_dumpout,
args.nrepeat,
args.time_kernel,
args.inLengths,
args.reduceDims,
args.reduceOp,
......@@ -387,7 +387,7 @@ int profile_reduce(int argc, char* argv[])
args.init_method,
args.do_log,
args.do_dumpout,
args.nrepeat,
args.time_kernel,
args.inLengths,
args.reduceDims,
args.reduceOp,
......@@ -414,7 +414,7 @@ int profile_reduce(int argc, char* argv[])
args.init_method,
args.do_log,
args.do_dumpout,
args.nrepeat,
args.time_kernel,
args.inLengths,
args.reduceDims,
args.reduceOp,
......@@ -429,7 +429,7 @@ int profile_reduce(int argc, char* argv[])
args.init_method,
args.do_log,
args.do_dumpout,
args.nrepeat,
args.time_kernel,
args.inLengths,
args.reduceDims,
args.reduceOp,
......@@ -454,7 +454,7 @@ int profile_reduce(int argc, char* argv[])
args.init_method,
args.do_log,
args.do_dumpout,
args.nrepeat,
args.time_kernel,
args.inLengths,
args.reduceDims,
args.reduceOp,
......@@ -471,7 +471,7 @@ int profile_reduce(int argc, char* argv[])
args.init_method,
args.do_log,
args.do_dumpout,
args.nrepeat,
args.time_kernel,
args.inLengths,
args.reduceDims,
args.reduceOp,
......@@ -486,7 +486,7 @@ int profile_reduce(int argc, char* argv[])
args.init_method,
args.do_log,
args.do_dumpout,
args.nrepeat,
args.time_kernel,
args.inLengths,
args.reduceDims,
args.reduceOp,
......
#!/usr/bin/env python3
import os, io
import argparse
def print_to_string(*args, **kwargs):
output = io.StringIO()
print(*args, file=output, **kwargs)
contents = output.getvalue()
output.close()
return contents
def parse_args():
parser = argparse.ArgumentParser(description='Parse results from tf benchmark runs')
parser.add_argument('filename', type=str, help='Log file to prase or directory containing log files')
args = parser.parse_args()
files = []
if os.path.isdir(args.filename):
all_files = os.listdir(args.filename)
for name in all_files:
if not 'log' in name:
continue
files.append(os.path.join(args.filename, name))
else:
files = [args.filename]
args.files = files
return args
def main():
args = parse_args()
results = []
#parse results
glue=""
for filename in args.files:
for line in open(filename):
if 'Best Perf' in line:
lst=line.split()
results.append(print_to_string(glue.join(lst[8:]),lst[4]))
#sort results
#read baseline results for the latest develop branch
#write new results to the db
#compare the results to the baseline
#return 0 if performance criteria met, otherwise return 1
print(results)
return 0
if __name__ == '__main__':
main()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment