Commit b8bb1480 authored by Jing Zhang's avatar Jing Zhang
Browse files

3x3 test

parent 40b7d356
......@@ -271,6 +271,14 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
index_t nrepeat = 100;
std::cout << "conv_v5r1__NCHWc" << K1 << "_n" << N << "c" << C << "h" << Hi << "w" << Wi
<< "-k" << K << "c" << C << "y" << Y << "x" << X << "-u" << conv_strides[I0]
<< "v" << conv_strides[I1] << "l" << conv_dilations[I0] << "j"
<< conv_dilations[I1] << "q" << in_left_pads[I0] << "p" << in_right_pads[I0]
<< std::endl;
std::cout << "GridSize = " << GridSize << " BlockSize = " << BlockSize << std::endl;
for(index_t i = 0; i < 5; ++i)
{
std::cout << "Start running " << nrepeat << " times..." << std::endl;
......
......@@ -103,13 +103,47 @@ int main(int argc, char* argv[])
const bool do_log = atoi(argv[5]);
const int nrepeat = atoi(argv[6]);
#if 0
constexpr index_t N = 1;
constexpr index_t C = 16;
constexpr index_t Hi = 1080;
constexpr index_t Wi = 1920;
constexpr index_t K = 16;
constexpr index_t Y = 3;
constexpr index_t X = 3;
#elif 0
constexpr index_t N = 1;
constexpr index_t C = 16;
constexpr index_t Hi = 544;
constexpr index_t Hi = 540;
constexpr index_t Wi = 960;
constexpr index_t K = 16;
constexpr index_t Y = 3;
constexpr index_t X = 3;
#elif 0
constexpr index_t N = 1;
constexpr index_t C = 16;
constexpr index_t Hi = 480;
constexpr index_t Wi = 270;
constexpr index_t K = 16;
constexpr index_t Y = 3;
constexpr index_t X = 3;
#elif 1
constexpr index_t N = 1;
constexpr index_t C = 16;
constexpr index_t Hi = 240;
constexpr index_t Wi = 135;
constexpr index_t K = 16;
constexpr index_t Y = 3;
constexpr index_t X = 3;
#elif 1
constexpr index_t N = 1;
constexpr index_t C = 16;
constexpr index_t Hi = 1080;
constexpr index_t Wi = 1920;
constexpr index_t K = 16;
constexpr index_t Y = 1;
constexpr index_t X = 1;
#endif
const index_t conv_stride_h = 1;
const index_t conv_stride_w = 1;
......
......@@ -92,7 +92,6 @@ void device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(
const auto out_n_k0_ho_wo_k1_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, K0, Ho, Wo, K1));
#if 1
// cdata = 64, BlockSize = 64, 16x8x32x4
constexpr index_t BlockSize = 64;
......@@ -114,34 +113,9 @@ void device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(
constexpr index_t BThreadTransferSrcScalarPerVector_W = 1;
constexpr index_t CThreadTransferDstScalarPerVector_W = 8;
static_assert(KPerThread % CThreadTransferDstScalarPerVector_W == 0, "");
#else
constexpr index_t BlockSize = 64;
constexpr index_t KPerBlock = 16;
constexpr index_t HoPerBlock = 8;
constexpr index_t WoPerBlock = 32;
constexpr index_t EPerBlock = 1;
constexpr index_t KPerThread = 16;
constexpr index_t HoPerThread = 2;
constexpr index_t WoPerThread = 2;
constexpr index_t EPerThread = EPerBlock;
using ABlockTransferThreadSliceLengths_E_K = Sequence<9, 1>;
using ABlockTransferThreadClusterLengths_E_K = Sequence<EPerBlock, 16>;
constexpr index_t ABlockTransferSrcScalarPerVector_E = 1;
constexpr index_t ABlockTransferDstScalarPerVector_K = 1;
constexpr index_t BThreadTransferSrcScalarPerVector_W = 1;
constexpr index_t CThreadTransferDstScalarPerVector_W = K1;
static_assert(KPerThread % CThreadTransferDstScalarPerVector_W == 0, "");
#endif
constexpr auto conv_driver =
#if 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment