Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
bd811e2c
Commit
bd811e2c
authored
Jan 24, 2019
by
Chao Liu
Browse files
refactor
parent
c39c573e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
10 deletions
+15
-10
driver/conv.cu
driver/conv.cu
+3
-3
src/include/gridwise_implicit_gemm_convolution_1_nchw_srck_nkhw.cuh
...e/gridwise_implicit_gemm_convolution_1_nchw_srck_nkhw.cuh
+12
-7
No files found.
driver/conv.cu
View file @
bd811e2c
...
...
@@ -9,7 +9,7 @@
#include "device_direct_convolution_1.cuh"
#include "device_direct_convolution_2.cuh"
//#include "device_implicit_gemm_convolution_1_nchw_kcsr.cuh"
//
#include "device_implicit_gemm_convolution_1_nchw_srck_nkhw.cuh"
#include "device_implicit_gemm_convolution_1_nchw_srck_nkhw.cuh"
#include "device_implicit_gemm_convolution_2_cnhw_srck_knhw.cuh"
//#include "device_winograd_convolution.cuh"
...
...
@@ -418,9 +418,9 @@ int main()
device_direct_convolution_2
#elif 0
device_implicit_gemm_convolution_1_nchw_kcsr
#elif 0
device_implicit_gemm_convolution_1_nchw_srck_nkhw
#elif 1
device_implicit_gemm_convolution_1_nchw_srck_nkhw
#elif 0
device_implicit_gemm_convolution_2_cnhw_srck_knhw
#elif 0
device_winograd_convolution
...
...
src/include/gridwise_implicit_gemm_convolution_1_nchw_srck_nkhw.cuh
View file @
bd811e2c
...
...
@@ -103,6 +103,15 @@ gridwise_implicit_gemm_convolution_1_nchw_srck_nkhw(InGlobalDesc,
}
#endif
// blockwise copy
// wei: format is [S,R,C,K], no conversion needed
constexpr
auto
blockwise_wei_copy
=
blockwise_4d_tensor_copy_1
<
BlockSize
,
Float
,
decltype
(
wei_srck_global_desc
),
decltype
(
wei_srck_block_desc
),
decltype
(
wei_srck_block_desc
.
GetLengths
())
>
{};
// a series of blockwise batched GEMM
// C_matrix += transpose(A_matrix) * B_matrix
// A_matrix and B_matrix saved in LDS, C_matrix saved in register
...
...
@@ -171,13 +180,9 @@ gridwise_implicit_gemm_convolution_1_nchw_srck_nkhw(InGlobalDesc,
#if 1
// weight: global mem to LDS,
// format is [S,R,C,K], no conversion needed
blockwise_4d_tensor_copy
<
BlockSize
>
(
wei_srck_global_desc
,
p_wei_global
+
wei_srck_global_desc
.
Get1dIndex
(
0
,
0
,
c_block_data_begin
,
k_block_data_begin
),
wei_srck_block_desc
,
p_wei_block
,
wei_srck_block_desc
.
GetLengths
());
blockwise_wei_copy
.
run
(
p_wei_global
+
wei_srck_global_desc
.
Get1dIndex
(
0
,
0
,
c_block_data_begin
,
k_block_data_begin
),
p_wei_block
);
#endif
__syncthreads
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment