Commit edc89778 authored by Chao Liu's avatar Chao Liu
Browse files

update flops calculation

parent 5696c81f
...@@ -423,7 +423,7 @@ void device_implicit_gemm_convolution_1_chwn_cyxk_khwn(InDesc, ...@@ -423,7 +423,7 @@ void device_implicit_gemm_convolution_1_chwn_cyxk_khwn(InDesc,
printf("Elapsed time : %f ms, %f TFlop/s\n", printf("Elapsed time : %f ms, %f TFlop/s\n",
time, time,
(float)calculate_convolution_flops(InDesc{}, WeiDesc{}, OutDesc{}) / (float)calculate_convolution_flops(InDesc{}, WeiDesc{}, OutDesc{}) /
(std::size_t(1024) * 1024 * 1024 * 1024) / (time / 1000)); (std::size_t(1000) * 1000 * 1000) / time);
usleep(std::min(time * 1000, float(10000))); usleep(std::min(time * 1000, float(10000)));
} }
......
...@@ -314,7 +314,7 @@ void device_implicit_gemm_convolution_2_chwn_cyxk_khwn(InDesc, ...@@ -314,7 +314,7 @@ void device_implicit_gemm_convolution_2_chwn_cyxk_khwn(InDesc,
printf("Elapsed time : %f ms, %f TFlop/s\n", printf("Elapsed time : %f ms, %f TFlop/s\n",
time, time,
(float)calculate_convolution_flops(InDesc{}, WeiDesc{}, OutDesc{}) / (float)calculate_convolution_flops(InDesc{}, WeiDesc{}, OutDesc{}) /
(std::size_t(1024) * 1024 * 1024 * 1024) / (time / 1000)); (std::size_t(1000) * 1000 * 1000) / time);
usleep(std::min(time * 1000, float(10000))); usleep(std::min(time * 1000, float(10000)));
} }
......
...@@ -259,7 +259,6 @@ struct BlockwiseBatchGemmBlockABlockBThreadCTransANormalBNormalC_V2 ...@@ -259,7 +259,6 @@ struct BlockwiseBatchGemmBlockABlockBThreadCTransANormalBNormalC_V2
c_thread_mtx, c_thread_mtx,
False, False,
p_c_thread + ib * ThreadMatrixStrideC); p_c_thread + ib * ThreadMatrixStrideC);
} }
} }
} }
......
...@@ -269,9 +269,9 @@ struct GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer ...@@ -269,9 +269,9 @@ struct GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer
#elif 0 #elif 0
blockwise_gemm.Run_asm blockwise_gemm.Run_asm
#endif #endif
(p_wei_block_now + wei_cyxk_block_desc.Get1dIndex(0, y, x, 0), (p_wei_block_now + wei_cyxk_block_desc.Get1dIndex(0, y, x, 0),
p_in_block_now + y * Wi + x, p_in_block_now + y * Wi + x,
p_out_thread); p_out_thread);
} }
} }
...@@ -310,9 +310,9 @@ struct GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer ...@@ -310,9 +310,9 @@ struct GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer
#elif 0 #elif 0
blockwise_gemm.Run_asm blockwise_gemm.Run_asm
#endif #endif
(p_wei_block_double + wei_cyxk_block_desc.Get1dIndex(0, y, x, 0), (p_wei_block_double + wei_cyxk_block_desc.Get1dIndex(0, y, x, 0),
p_in_block_double + y * Wi + x, p_in_block_double + y * Wi + x,
p_out_thread); p_out_thread);
} }
} }
...@@ -335,10 +335,10 @@ struct GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer ...@@ -335,10 +335,10 @@ struct GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer
#elif 0 #elif 0
blockwise_gemm.Run_asm blockwise_gemm.Run_asm
#endif #endif
(p_wei_block_double + wei_block_space + (p_wei_block_double + wei_block_space +
wei_cyxk_block_desc.Get1dIndex(0, y, x, 0), wei_cyxk_block_desc.Get1dIndex(0, y, x, 0),
p_in_block_double + in_block_space + y * Wi + x, p_in_block_double + in_block_space + y * Wi + x,
p_out_thread); p_out_thread);
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment