"git@developer.sourcefind.cn:gaoqiong/composable_kernel.git" did not exist on "c77ae65d40b1316dac02c4decf02d8517c840be2"
Commit e9ac4855 authored by Chao Liu's avatar Chao Liu
Browse files

tune

parent b5b4fd28
...@@ -357,7 +357,7 @@ int main() ...@@ -357,7 +357,7 @@ int main()
constexpr unsigned C = 1; constexpr unsigned C = 1;
constexpr unsigned HI = 34; constexpr unsigned HI = 34;
constexpr unsigned WI = 34; constexpr unsigned WI = 34;
constexpr unsigned K = 4; constexpr unsigned K = 1;
constexpr unsigned S = 3; constexpr unsigned S = 3;
constexpr unsigned R = 3; constexpr unsigned R = 3;
#elif 1 #elif 1
......
...@@ -67,29 +67,29 @@ void device_implicit_gemm_convolution_2_cnhw_srck_knhw(InDesc, ...@@ -67,29 +67,29 @@ void device_implicit_gemm_convolution_2_cnhw_srck_knhw(InDesc,
Tensor<T> out_knhw(make_TensorDescriptor(out_knhw_desc)); Tensor<T> out_knhw(make_TensorDescriptor(out_knhw_desc));
#if 0 #if 0
constexpr unsigned BPerBlock = 128; constexpr unsigned BPerBlock = 256;
constexpr unsigned KPerBlock = 4; constexpr unsigned KPerBlock = 1;
constexpr unsigned CPerBlock = 1; constexpr unsigned CPerBlock = 1;
constexpr unsigned BPerThread = 4; constexpr unsigned BPerThread = 8;
constexpr unsigned KPerThread = 1; constexpr unsigned KPerThread = 1;
constexpr unsigned CPerThread = 1; constexpr unsigned CPerThread = 1;
constexpr unsigned ThreadPerClusterRow = 4; constexpr unsigned ThreadPerClusterRow = 1;
constexpr unsigned ThreadPerClusterColumn = 16; constexpr unsigned ThreadPerClusterColumn = 4;
constexpr unsigned BlockSize = 128; constexpr unsigned BlockSize = 32;
#elif 1 #elif 1
constexpr unsigned BPerBlock = 128; constexpr unsigned BPerBlock = 128;
constexpr unsigned KPerBlock = 64; constexpr unsigned KPerBlock = 64;
constexpr unsigned CPerBlock = 2; constexpr unsigned CPerBlock = 2;
constexpr unsigned BPerThread = 4; constexpr unsigned BPerThread = 8;
constexpr unsigned KPerThread = 16; constexpr unsigned KPerThread = 8;
constexpr unsigned CPerThread = 1; constexpr unsigned CPerThread = 1;
constexpr unsigned ThreadPerClusterRow = 4; constexpr unsigned ThreadPerClusterRow = 4;
constexpr unsigned ThreadPerClusterColumn = 16; constexpr unsigned ThreadPerClusterColumn = 4;
constexpr unsigned BlockSize = 128; constexpr unsigned BlockSize = 128;
#endif #endif
......
...@@ -388,9 +388,9 @@ struct blockwise_gemm_block_a_block_b_thread_c ...@@ -388,9 +388,9 @@ struct blockwise_gemm_block_a_block_b_thread_c
const unsigned thread_work_cluster_id = const unsigned thread_work_cluster_id =
thread_id - cluster_work_block_id * (MThreadPerCluster * NThreadPerCluster); thread_id - cluster_work_block_id * (MThreadPerCluster * NThreadPerCluster);
const unsigned m_cluster_work_block_id = cluster_work_block_id / NThreadPerCluster; const unsigned m_cluster_work_block_id = cluster_work_block_id / NClusterWork;
const unsigned n_cluster_work_block_id = const unsigned n_cluster_work_block_id =
cluster_work_block_id - m_cluster_work_block_id * NThreadPerCluster; cluster_work_block_id - m_cluster_work_block_id * NClusterWork;
const unsigned m_thread_work_cluster_id = const unsigned m_thread_work_cluster_id =
thread_work_cluster_id / NThreadPerCluster; thread_work_cluster_id / NThreadPerCluster;
...@@ -401,12 +401,12 @@ struct blockwise_gemm_block_a_block_b_thread_c ...@@ -401,12 +401,12 @@ struct blockwise_gemm_block_a_block_b_thread_c
if(get_block_1d_id() == 0) if(get_block_1d_id() == 0)
{ {
printf("%u %u, \t" printf("%u %u, \t"
//"MClusterWork %u MThreadPerCluster %u NClusterWork %u NThreadPerCluster %u \t" "MClusterWork %u MThreadPerCluster %u NClusterWork %u NThreadPerCluster %u \t"
"m_cluster_work_block_id %u n_cluster_work_block_id %u \t" "m_cluster_work_block_id %u n_cluster_work_block_id %u \t"
"m_thread_work_cluster_id %u n_thread_work_cluster_id %u \t" "m_thread_work_cluster_id %u n_thread_work_cluster_id %u \t"
"\n", "\n",
get_block_1d_id(), get_thread_local_1d_id(), get_block_1d_id(), get_thread_local_1d_id(),
//MClusterWork, MThreadPerCluster, NClusterWork, NThreadPerCluster, MClusterWork, MThreadPerCluster, NClusterWork, NThreadPerCluster,
m_cluster_work_block_id, n_cluster_work_block_id, m_cluster_work_block_id, n_cluster_work_block_id,
m_thread_work_cluster_id, n_thread_work_cluster_id); m_thread_work_cluster_id, n_thread_work_cluster_id);
} }
......
...@@ -239,10 +239,13 @@ gridwise_implicit_gemm_convolution_2_cnhw_srck_knhw(InGlobalDesc, ...@@ -239,10 +239,13 @@ gridwise_implicit_gemm_convolution_2_cnhw_srck_knhw(InGlobalDesc,
p_out_thread[out_kb_thread_desc.Get1dIndex(k, b)]); p_out_thread[out_kb_thread_desc.Get1dIndex(k, b)]);
} }
#endif #endif
if(k_data < K && n_data < N && h_data < Ho && w_data < Wo) if(n_data < N && h_data < Ho && w_data < Wo)
{ {
#if 1
p_out_global[out_knhw_global_desc.Get1dIndex(k_data, n_data, h_data, w_data)] = p_out_global[out_knhw_global_desc.Get1dIndex(k_data, n_data, h_data, w_data)] =
p_out_thread[out_kb_thread_desc.Get1dIndex(k, b)]; p_out_thread[out_kb_thread_desc.Get1dIndex(k, b)];
#endif
#if 0 #if 0
if(get_block_1d_id() == 0) if(get_block_1d_id() == 0)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment