Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
c9a8e558
Commit
c9a8e558
authored
Jul 20, 2019
by
Chao Liu
Browse files
adding tensor_view
parent
8669e242
Changes
56
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
105 additions
and
51 deletions
+105
-51
composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
...gorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
+1
-1
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp
...ridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp
+2
-2
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp
...ridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp
+2
-2
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp
...ridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp
+2
-2
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp
...n_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp
+2
-2
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp
...ridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp
+2
-2
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp
...n_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp
+2
-2
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp
.../gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp
+2
-2
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp
...ion_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp
+2
-2
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp
.../gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp
+3
-3
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp
...ion_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp
+3
-3
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
...ridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
+3
-3
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
...n_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
+3
-3
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r2_nchw_kcyx_nkhw_lds_double_buffer.hpp
...n_implicit_gemm_v4r2_nchw_kcyx_nkhw_lds_double_buffer.hpp
+3
-9
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer.hpp
...n_implicit_gemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer.hpp
+68
-8
composable_kernel/include/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp
...idwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp
+1
-1
composable_kernel/include/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp
...ise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp
+2
-2
composable_kernel/include/tensor/constant_matrix_descriptor.hpp
...able_kernel/include/tensor/constant_matrix_descriptor.hpp
+1
-1
composable_kernel/include/tensor/constant_merged_tensor_descriptor.hpp
...rnel/include/tensor/constant_merged_tensor_descriptor.hpp
+1
-1
composable_kernel/include/tensor/constant_tensor_descriptor.hpp
...able_kernel/include/tensor/constant_tensor_descriptor.hpp
+0
-0
No files found.
composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
View file @
c9a8e558
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
#define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp
View file @
c9a8e558
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp
View file @
c9a8e558
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_3d_tensor_op.hpp"
#include "blockwise_3d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp
View file @
c9a8e558
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp
View file @
c9a8e558
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp
View file @
c9a8e558
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp
View file @
c9a8e558
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp
View file @
c9a8e558
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp
View file @
c9a8e558
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp
View file @
c9a8e558
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
erged
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_m
erged
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp
View file @
c9a8e558
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
erged
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_m
erged
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
View file @
c9a8e558
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R1_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R1_NCHW_KCYX_NKHW
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
erged
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_m
erged
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
View file @
c9a8e558
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R1_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R1_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
erged
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_m
erged
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r2_nchw_kcyx_nkhw_lds_double_buffer.hpp
View file @
c9a8e558
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R2_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R2_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
erged
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_m
erged
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
...
@@ -181,12 +181,6 @@ struct GridwiseConvolutionImplicitGemm_v4r2_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -181,12 +181,6 @@ struct GridwiseConvolutionImplicitGemm_v4r2_nchw_kcyx_nkhw_lds_double_buffer
InBlockCopyDataPerAccess_W2
>
({
0
,
0
,
0
,
0
,
b_block_data_on_global
,
0
,
0
,
0
},
InBlockCopyDataPerAccess_W2
>
({
0
,
0
,
0
,
0
,
b_block_data_on_global
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
});
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
});
#if 0
{
printf("id (%d %d), in offset: %d %d\n", get_block_1d_id(), get_thread_local_1d_id(), blockwise_in_copy.mThreadSrcOffset, blockwise_in_copy.mThreadDstOffset);
}
#endif
// weight tensor
// weight tensor
// tensor descriptor in device memory, src of blockwise copy
// tensor descriptor in device memory, src of blockwise copy
constexpr
auto
wei_e_k_global_desc
=
constexpr
auto
wei_e_k_global_desc
=
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer.hpp
View file @
c9a8e558
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R3_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R3_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
erged
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_m
erged
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
...
@@ -296,7 +296,16 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -296,7 +296,16 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
// LDS double buffer: preload data into LDS
// LDS double buffer: preload data into LDS
{
{
#if 0
blockwise_in_copy.Run(p_in_global, p_in_block_double);
blockwise_in_copy.Run(p_in_global, p_in_block_double);
#endif
#if 1
if
(
get_block_1d_id
()
==
0
&&
get_thread_local_1d_id
()
==
0
)
{
printf
(
"blockwise_wei_copy.Run
\n
"
);
}
#endif
blockwise_wei_copy
.
Run
(
p_wei_global
,
p_wei_block_double
);
blockwise_wei_copy
.
Run
(
p_wei_global
,
p_wei_block_double
);
}
}
...
@@ -327,12 +336,36 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -327,12 +336,36 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
__syncthreads
();
__syncthreads
();
// LDS doubel buffer: load next data from device mem
#if 0
if(get_block_1d_id() == 0 && get_thread_local_1d_id() == 0)
{
for(index_t i = 0; i < wei_e_k_block_desc.GetLengths()[0]; ++i)
{
for(index_t j = 0; j < wei_e_k_block_desc.GetLengths()[1]; ++j)
{
printf("%d %d %f, ", i, j, p_wei_block_now[wei_e_k_block_desc.GetOffsetFromMultiIndex(i, j)]);
}
}
printf("\n");
}
#endif
// LDS doubel buffer: load next data from device mem
#if 0
blockwise_in_copy.RunLoadRegisterClipboard(p_in_global, p_in_register_clipboard);
blockwise_in_copy.RunLoadRegisterClipboard(p_in_global, p_in_register_clipboard);
#endif
#if 1
if
(
get_block_1d_id
()
==
0
&&
get_thread_local_1d_id
()
==
0
)
{
printf
(
"blockwise_wei_copy.RunLoad
\n
"
);
}
#endif
blockwise_wei_copy
.
RunLoadRegisterClipboard
(
p_wei_block_on_global
,
blockwise_wei_copy
.
RunLoadRegisterClipboard
(
p_wei_block_on_global
,
p_wei_register_clipboard
);
p_wei_register_clipboard
);
#if
1
#if
0
if(get_block_1d_id() == 0)
if(get_block_1d_id() == 0)
{
{
printf("tid (%d %d), %f %f %f %f\n",
printf("tid (%d %d), %f %f %f %f\n",
...
@@ -348,9 +381,18 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -348,9 +381,18 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
// LDS double buffer: GEMM on current data
// LDS double buffer: GEMM on current data
blockwise_gemm
.
Run
(
p_wei_block_now
,
p_in_block_now
,
p_out_thread
);
blockwise_gemm
.
Run
(
p_wei_block_now
,
p_in_block_now
,
p_out_thread
);
// LDS double buffer: store next data to LDS
// LDS double buffer: store next data to LDS
#if 0
blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard,
blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard,
p_in_block_next);
p_in_block_next);
#endif
#if 1
if
(
get_block_1d_id
()
==
0
&&
get_thread_local_1d_id
()
==
0
)
{
printf
(
"blockwise_wei_copy.RunStore
\n
"
);
}
#endif
blockwise_wei_copy
.
RunStoreRegisterClipboard
(
p_wei_register_clipboard
,
blockwise_wei_copy
.
RunStoreRegisterClipboard
(
p_wei_register_clipboard
,
p_wei_block_next
);
p_wei_block_next
);
}
}
...
@@ -367,17 +409,33 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -367,17 +409,33 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
__syncthreads
();
__syncthreads
();
// LDS doubel buffer: load next data from device mem
// LDS doubel buffer: load next data from device mem
#if 0
blockwise_in_copy.RunLoadRegisterClipboard(p_in_global, p_in_register_clipboard);
blockwise_in_copy.RunLoadRegisterClipboard(p_in_global, p_in_register_clipboard);
#endif
#if 1
if
(
get_block_1d_id
()
==
0
&&
get_thread_local_1d_id
()
==
0
)
{
printf
(
"blockwise_wei_copy.RunLoad
\n
"
);
}
#endif
blockwise_wei_copy
.
RunLoadRegisterClipboard
(
p_wei_block_on_global
,
blockwise_wei_copy
.
RunLoadRegisterClipboard
(
p_wei_block_on_global
,
p_wei_register_clipboard
);
p_wei_register_clipboard
);
// LDS double buffer: GEMM on current data
// LDS double buffer: GEMM on current data
blockwise_gemm
.
Run
(
p_wei_block_double
,
p_in_block_double
,
p_out_thread
);
blockwise_gemm
.
Run
(
p_wei_block_double
,
p_in_block_double
,
p_out_thread
);
// LDS double buffer: store next data to LDS
// LDS double buffer: store next data to LDS
#if 0
blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard,
blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard,
p_in_block_double + in_block_space);
p_in_block_double + in_block_space);
#endif
#if 1
if
(
get_block_1d_id
()
==
0
&&
get_thread_local_1d_id
()
==
0
)
{
printf
(
"blockwise_wei_copy.RunStore
\n
"
);
}
#endif
blockwise_wei_copy
.
RunStoreRegisterClipboard
(
p_wei_register_clipboard
,
blockwise_wei_copy
.
RunStoreRegisterClipboard
(
p_wei_register_clipboard
,
p_wei_block_double
+
wei_block_space
);
p_wei_block_double
+
wei_block_space
);
...
@@ -444,6 +502,7 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -444,6 +502,7 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
out_k_n1_ho1_wo1_b_n2_ho2_wo2_global_merged_desc
.
GetOffsetFromMultiIndex
(
out_k_n1_ho1_wo1_b_n2_ho2_wo2_global_merged_desc
.
GetOffsetFromMultiIndex
(
k_thread_data_on_global
,
0
,
0
,
0
,
b_thread_data_on_global
,
0
,
0
,
0
);
k_thread_data_on_global
,
0
,
0
,
0
,
b_thread_data_on_global
,
0
,
0
,
0
);
#if 0
threadwise_generic_tensor_slice_copy_v1(
threadwise_generic_tensor_slice_copy_v1(
out_n0_n1_n2_k0_k1_k2_ho0_ho1_ho2_wo0_wo1_wo2_thread_desc,
out_n0_n1_n2_k0_k1_k2_ho0_ho1_ho2_wo0_wo1_wo2_thread_desc,
p_out_thread,
p_out_thread,
...
@@ -454,6 +513,7 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -454,6 +513,7 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
out_n0_n1_n2_k0_k1_k2_ho0_ho1_ho2_wo0_wo1_wo2_thread_desc.GetLengths(),
out_n0_n1_n2_k0_k1_k2_ho0_ho1_ho2_wo0_wo1_wo2_thread_desc.GetLengths(),
arithmetic_sequence_gen<0, 12, 1>::type{},
arithmetic_sequence_gen<0, 12, 1>::type{},
Number<1>{});
Number<1>{});
#endif
}
}
}
}
};
};
...
...
composable_kernel/include/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp
View file @
c9a8e558
#pragma once
#pragma once
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_direct_convolution.hpp"
#include "blockwise_direct_convolution.hpp"
...
...
composable_kernel/include/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp
View file @
c9a8e558
#pragma once
#pragma once
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
#include "
C
onstant
M
atrix
D
escriptor.hpp"
#include "
c
onstant
_m
atrix
_d
escriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
...
...
composable_kernel/include/tensor
_description/C
onstant
M
atrix
D
escriptor.hpp
→
composable_kernel/include/tensor
/c
onstant
_m
atrix
_d
escriptor.hpp
View file @
c9a8e558
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
namespace
ck
{
namespace
ck
{
...
...
composable_kernel/include/tensor
_description/C
onstant
M
erged
T
ensor
D
escriptor.hpp
→
composable_kernel/include/tensor
/c
onstant
_m
erged
_t
ensor
_d
escriptor.hpp
View file @
c9a8e558
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#include "common_header.hpp"
#include "common_header.hpp"
#include "
C
onstant
T
ensor
D
escriptor.hpp"
#include "
c
onstant
_t
ensor
_d
escriptor.hpp"
namespace
ck
{
namespace
ck
{
...
...
composable_kernel/include/tensor
_description/C
onstant
T
ensor
D
escriptor.hpp
→
composable_kernel/include/tensor
/c
onstant
_t
ensor
_d
escriptor.hpp
View file @
c9a8e558
File moved
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment