Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
ff7a6219
Commit
ff7a6219
authored
Jan 16, 2019
by
Chao Liu
Browse files
refactor
parent
89ee2597
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
26 deletions
+12
-26
driver/conv.cu
driver/conv.cu
+10
-18
driver/device_implicit_gemm_convolution.cuh
driver/device_implicit_gemm_convolution.cuh
+2
-2
src/include/gridwise_implicit_gemm_convolution_nchw_srck.cuh
src/include/gridwise_implicit_gemm_convolution_nchw_srck.cuh
+0
-6
No files found.
driver/conv.cu
View file @
ff7a6219
...
...
@@ -336,14 +336,6 @@ void check_error(const Tensor<T>& ref, const Tensor<T>& result)
int
main
()
{
#if 0
constexpr unsigned N = 1;
constexpr unsigned C = 1;
constexpr unsigned HI = 4;
constexpr unsigned WI = 4;
constexpr unsigned K = 1;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif
0
constexpr unsigned N = 1;
constexpr unsigned C = 1;
constexpr unsigned HI = 34;
...
...
@@ -369,12 +361,12 @@ int main()
constexpr
unsigned
R
=
3
;
#elif 0
constexpr
unsigned
N
=
64
;
constexpr
unsigned
C
=
6
4
;
constexpr
unsigned
HI
=
6
6
;
constexpr
unsigned
WI
=
6
6
;
constexpr
unsigned
C
=
25
6
;
constexpr
unsigned
HI
=
3
6
;
constexpr
unsigned
WI
=
3
6
;
constexpr
unsigned
K
=
64
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
constexpr
unsigned
S
=
5
;
constexpr
unsigned
R
=
5
;
#endif
auto
in_nchw_desc
=
make_ConstantTensorDescriptor
(
Sequence
<
N
,
C
,
HI
,
WI
>
{});
...
...
driver/device_implicit_gemm_convolution.cuh
View file @
ff7a6219
...
...
@@ -52,7 +52,7 @@ void device_implicit_gemm_convolution(
constexpr
unsigned
WoPerThread
=
2
;
constexpr
unsigned
BlockSize
=
128
;
#elif
1
#elif
0
constexpr
unsigned
NPerBlock
=
2
;
constexpr
unsigned
KPerBlock
=
64
;
constexpr
unsigned
CPerBlock
=
4
;
...
...
@@ -60,7 +60,7 @@ void device_implicit_gemm_convolution(
constexpr
unsigned
WoPerBlock
=
32
;
constexpr
unsigned
KPerThread
=
4
;
constexpr
unsigned
CPerThread
=
1
;
constexpr
unsigned
CPerThread
=
2
;
constexpr
unsigned
HoPerThread
=
2
;
constexpr
unsigned
WoPerThread
=
2
;
...
...
src/include/gridwise_implicit_gemm_convolution_nchw_srck.cuh
View file @
ff7a6219
...
...
@@ -152,7 +152,6 @@ __global__ void gridwise_implicit_gemm_convolution_nchw_srck(InGlobalDesc,
for
(
unsigned
c_block_data_begin
=
0
;
c_block_data_begin
<
in_nchw_global_desc
.
GetLength
(
I1
);
c_block_data_begin
+=
CPerBlock
,
__syncthreads
())
{
#if 1
// input: global mem to LDS,
// convert 4d-tensor in[N,C,Hi,Wi] to matrix in_matrix[C,Hi*Wi*N]
blockwise_4d_tensor_copy_reorder_by_get_dst_from_src
<
BlockSize
>
(
...
...
@@ -165,9 +164,7 @@ __global__ void gridwise_implicit_gemm_convolution_nchw_srck(InGlobalDesc,
p_in_block
,
in_nchw_block_desc
.
GetLengths
(),
reorder_chwn_from_nchw
);
#endif
#if 1
// weight: global mem to LDS,
blockwise_4d_tensor_copy
<
BlockSize
>
(
wei_srck_global_desc
,
...
...
@@ -176,11 +173,9 @@ __global__ void gridwise_implicit_gemm_convolution_nchw_srck(InGlobalDesc,
wei_srck_block_desc
,
p_wei_block
,
wei_srck_block_desc
.
GetLengths
());
#endif
__syncthreads
();
#if 1
// a series of batched GEMM
for
(
unsigned
s
=
0
;
s
<
S
;
++
s
)
{
...
...
@@ -194,7 +189,6 @@ __global__ void gridwise_implicit_gemm_convolution_nchw_srck(InGlobalDesc,
f_accum
);
}
}
#endif
}
const
auto
matrix_c_index
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment