Commit 8c385cf5 authored by Chao Liu's avatar Chao Liu
Browse files

implicit gemm v3 added lds double buffer

parent e17b495d
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include "device.hpp" #include "device.hpp"
#include "gridwise_convolution_wrapper.hip.hpp" #include "gridwise_convolution_wrapper.hip.hpp"
#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp" #include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp"
#include "gridwise_convolution_implicit_gemm_v3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp"
template <class T, class InDesc, class WeiDesc, class OutDesc> template <class T, class InDesc, class WeiDesc, class OutDesc>
void device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(InDesc, void device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(InDesc,
...@@ -98,8 +99,10 @@ void device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(InDesc, ...@@ -98,8 +99,10 @@ void device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(InDesc,
for(index_t i = 0; i < nrepeat; ++i) for(index_t i = 0; i < nrepeat; ++i)
{ {
constexpr auto gridwise_conv = constexpr auto gridwise_conv =
#if 1 #if 0
GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw
#else
GridwiseConvolutionImplicitGemm_v3_lds_double_buffer_nchw_cyxk_nkhw
#endif #endif
<GridSize, <GridSize,
BlockSize, BlockSize,
......
...@@ -422,7 +422,7 @@ int main(int argc, char* argv[]) ...@@ -422,7 +422,7 @@ int main(int argc, char* argv[])
constexpr index_t HPad = 0; constexpr index_t HPad = 0;
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 1 #elif 0
// 3x3, 34x34 // 3x3, 34x34
constexpr index_t N = 64; constexpr index_t N = 64;
constexpr index_t C = 256; constexpr index_t C = 256;
...@@ -446,7 +446,7 @@ int main(int argc, char* argv[]) ...@@ -446,7 +446,7 @@ int main(int argc, char* argv[])
constexpr index_t HPad = 0; constexpr index_t HPad = 0;
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 1
// 3x3 filter, 28x28 image // 3x3 filter, 28x28 image
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 256; constexpr index_t C = 256;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment