Commit 3d34cf56 authored by Jing Zhang's avatar Jing Zhang
Browse files

repeat running

parent 7e87e0b3
...@@ -149,18 +149,30 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw( ...@@ -149,18 +149,30 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
BThreadTransferSrcScalarPerVector_E2, BThreadTransferSrcScalarPerVector_E2,
CThreadTransferDstScalarPerVector_K>{}; CThreadTransferDstScalarPerVector_K>{};
const auto ave_time = for(int i = 0; i < 5; i++)
conv_driver.Run(wei_k_c0_y_x_c1_desc, {
in_n_c0_hi_wi_c1_desc,
out_n_k0_ho_wo_k1_desc, const auto ave_time =
conv_strides, conv_driver.Run(wei_k_c0_y_x_c1_desc,
conv_dilations, in_n_c0_hi_wi_c1_desc,
in_left_pads, out_n_k0_ho_wo_k1_desc,
in_right_pads, conv_strides,
static_cast<TInWei*>(wei_k_c0_y_x_c1_device_buf.GetDeviceBuffer()), conv_dilations,
static_cast<TInWei*>(in_n_c0_hi_wi_c1_device_buf.GetDeviceBuffer()), in_left_pads,
static_cast<TOut*>(out_n_k0_ho_wo_k1_device_buf.GetDeviceBuffer()), in_right_pads,
nrepeat); static_cast<TInWei*>(wei_k_c0_y_x_c1_device_buf.GetDeviceBuffer()),
static_cast<TInWei*>(in_n_c0_hi_wi_c1_device_buf.GetDeviceBuffer()),
static_cast<TOut*>(out_n_k0_ho_wo_k1_device_buf.GetDeviceBuffer()),
nrepeat);
{
float perf = static_cast<float>(std::size_t(2) * N * K * Ho * Wo * C * Y * X) /
(std::size_t(1000) * 1000 * 1000) / ave_time;
std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s"
<< std::endl;
}
}
out_n_k0_ho_wo_k1_device_buf.FromDevice(out_n_k0_ho_wo_k1.mData.data()); out_n_k0_ho_wo_k1_device_buf.FromDevice(out_n_k0_ho_wo_k1.mData.data());
...@@ -169,11 +181,4 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw( ...@@ -169,11 +181,4 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
}; };
make_ParallelTensorFunctor(f_nk0hwk1_to_nkhw, N, K, Ho, Wo)(); make_ParallelTensorFunctor(f_nk0hwk1_to_nkhw, N, K, Ho, Wo)();
{
float perf = static_cast<float>(std::size_t(2) * N * K * Ho * Wo * C * Y * X) /
(std::size_t(1000) * 1000 * 1000) / ave_time;
std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s" << std::endl;
}
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment