"test/normalization/test_groupnorm_fp32.cpp" did not exist on "f584ab0c545ade05ae793a8b36fa282d47d0f698"
Commit 20423a35 authored by Chao Liu's avatar Chao Liu
Browse files

tune direct

parent 1eafc9c1
......@@ -325,57 +325,41 @@ int main()
#if 0
constexpr unsigned N = 1;
constexpr unsigned C = 1;
constexpr unsigned HI = 34;
constexpr unsigned WI = 34;
constexpr unsigned HI = 4;
constexpr unsigned WI = 4;
constexpr unsigned K = 1;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 1
constexpr unsigned N = 64;
constexpr unsigned C = 256;
#elif 0
constexpr unsigned N = 1;
constexpr unsigned C = 1;
constexpr unsigned HI = 34;
constexpr unsigned WI = 34;
constexpr unsigned K = 64;
constexpr unsigned K = 1;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 0
constexpr unsigned N = 72;
constexpr unsigned C = 288;
constexpr unsigned HI = 38;
constexpr unsigned WI = 38;
constexpr unsigned K = 72;
constexpr unsigned N = 64;
constexpr unsigned C = 256;
constexpr unsigned HI = 34;
constexpr unsigned WI = 34;
constexpr unsigned K = 64;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 0
constexpr unsigned N = 1;
constexpr unsigned C = 1;
constexpr unsigned HI = 18;
constexpr unsigned WI = 18;
constexpr unsigned K = 1;
#elif 1
constexpr unsigned N = 64;
constexpr unsigned C = 64;
constexpr unsigned HI = 56;
constexpr unsigned WI = 56;
constexpr unsigned K = 64;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 0
constexpr unsigned N = 1;
constexpr unsigned C = 1;
constexpr unsigned HI = 4;
constexpr unsigned WI = 4;
constexpr unsigned K = 1;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 0
constexpr unsigned N = 2;
constexpr unsigned C = 3;
constexpr unsigned HI = 130;
constexpr unsigned WI = 130;
constexpr unsigned K = 5;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 0
constexpr unsigned N = 3;
constexpr unsigned C = 16;
constexpr unsigned HI = 130;
constexpr unsigned WI = 130;
constexpr unsigned K = 4;
constexpr unsigned N = 64;
constexpr unsigned C = 64;
constexpr unsigned HI = 66;
constexpr unsigned WI = 66;
constexpr unsigned K = 64;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#endif
......@@ -397,7 +381,7 @@ int main()
std::size_t num_thread = std::thread::hardware_concurrency();
in.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
wei.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
#elif 0
#elif 1
std::size_t num_thread = std::thread::hardware_concurrency();
in.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
wei.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
......@@ -412,7 +396,7 @@ int main()
#endif
}
#if 0
#if 1
host_winograd_3x3_convolution(in, wei, out_host);
check_error(out_host, out_device);
#elif 0
......
......@@ -21,9 +21,11 @@ void device_direct_convolution_1(
constexpr auto I2 = Number<2>{};
constexpr auto I3 = Number<3>{};
constexpr auto in_desc = InDesc{};
constexpr auto wei_desc = WeiDesc{};
constexpr auto out_desc = OutDesc{};
constexpr auto in_desc = InDesc{};
constexpr auto wei_desc = WeiDesc{};
constexpr auto out_desc = OutDesc{};
#if 0
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 2;
......@@ -37,6 +39,21 @@ void device_direct_convolution_1(
constexpr unsigned CPerThread = 2;
constexpr unsigned BlockSize = 128;
#elif 1
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 2;
constexpr unsigned KPerBlock = 16;
constexpr unsigned CPerBlock = 2;
constexpr unsigned YPerBlock = 2;
constexpr unsigned XPerBlock = 27;
constexpr unsigned NPerThread = 2;
constexpr unsigned KPerThread = 4;
constexpr unsigned CPerThread = 2;
constexpr unsigned BlockSize = 216;
#endif
constexpr unsigned GridSize = (out_desc.GetLength(I0) / NPerBlock) *
(out_desc.GetLength(I1) / KPerBlock) *
......
......@@ -21,9 +21,11 @@ void device_direct_convolution_2(
constexpr auto I2 = Number<2>{};
constexpr auto I3 = Number<3>{};
constexpr auto in_desc = InDesc{};
constexpr auto wei_desc = WeiDesc{};
constexpr auto out_desc = OutDesc{};
constexpr auto in_desc = InDesc{};
constexpr auto wei_desc = WeiDesc{};
constexpr auto out_desc = OutDesc{};
#if 0
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 2;
......@@ -37,6 +39,35 @@ void device_direct_convolution_2(
constexpr unsigned CPerThread = 2;
constexpr unsigned BlockSize = 128;
#elif 1
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 2;
constexpr unsigned KPerBlock = 32;
constexpr unsigned CPerBlock = 4;
constexpr unsigned YPerBlock = 1;
constexpr unsigned XPerBlock = 27;
constexpr unsigned NPerThread = 2;
constexpr unsigned KPerThread = 4;
constexpr unsigned CPerThread = 2;
constexpr unsigned BlockSize = 216;
#elif 1
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 2;
constexpr unsigned KPerBlock = 32;
constexpr unsigned CPerBlock = 4;
constexpr unsigned YPerBlock = 1;
constexpr unsigned XPerBlock = 32;
constexpr unsigned NPerThread = 2;
constexpr unsigned KPerThread = 4;
constexpr unsigned CPerThread = 2;
constexpr unsigned BlockSize = 256;
#endif
constexpr unsigned GridSize = (out_desc.GetLength(I0) / NPerBlock) *
(out_desc.GetLength(I1) / KPerBlock) *
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment