Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
a3230a64
Commit
a3230a64
authored
Dec 05, 2019
by
Chao Liu
Browse files
cosmetic fix
parent
bf948337
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
32 additions
and
31 deletions
+32
-31
driver/include/device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw.hpp
...ution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw.hpp
+14
-14
driver/include/device_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp
...ution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp
+8
-8
driver/include/device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
.../device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
+10
-9
No files found.
driver/include/device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw.hpp
View file @
a3230a64
...
...
@@ -11,8 +11,8 @@ template <typename T,
typename
OutDesc
,
typename
ConvStrides
,
typename
ConvDilations
,
typename
LeftPads
,
typename
RightPads
>
typename
In
LeftPads
,
typename
In
RightPads
>
void
device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw
(
InDesc
in_nchw_desc
,
Tensor
<
T
>&
in_nchw
,
WeiDesc
wei_kcyx_desc
,
...
...
@@ -21,8 +21,8 @@ void device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw(InDesc i
const
Tensor
<
T
>&
out_nkhw
,
ConvStrides
,
ConvDilations
,
LeftPads
,
RightPads
,
In
LeftPads
,
In
RightPads
,
std
::
size_t
nrepeat
)
{
using
namespace
ck
;
...
...
@@ -62,14 +62,14 @@ void device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw(InDesc i
constexpr
index_t
GemmThreadGemmDataPerReadM
=
4
;
constexpr
index_t
GemmThreadGemmDataPerReadN
=
4
;
using
GemmABlockCopyThreadSliceLengths_GemmK_GemmM
=
Sequence
<
1
,
4
>
;
// Gemm-K, Gemm-M
using
GemmABlockCopyThreadClusterLengths_GemmK_GemmM
=
Sequence
<
8
,
32
>
;
// Gemm-K, Gemm-M
using
GemmABlockCopyThreadSliceLengths_GemmK_GemmM
=
Sequence
<
1
,
4
>
;
using
GemmABlockCopyThreadClusterLengths_GemmK_GemmM
=
Sequence
<
8
,
32
>
;
constexpr
index_t
GemmABlockCopySrcDataPerRead_GemmM
=
4
;
// Gemm-M
constexpr
index_t
GemmABlockCopyDstDataPerWrite_GemmM
=
4
;
// Gemm-M
constexpr
index_t
GemmABlockCopySrcDataPerRead_GemmM
=
4
;
constexpr
index_t
GemmABlockCopyDstDataPerWrite_GemmM
=
4
;
using
GemmBBlockCopyThreadSliceLengths_GemmK_GemmN
=
Sequence
<
4
,
1
>
;
// Gemm-K, Gemm-N
using
GemmBBlockCopyThreadClusterLengths_GemmK_GemmN
=
Sequence
<
2
,
128
>
;
// Gemm-K, Gemm-N
using
GemmBBlockCopyThreadSliceLengths_GemmK_GemmN
=
Sequence
<
4
,
1
>
;
using
GemmBBlockCopyThreadClusterLengths_GemmK_GemmN
=
Sequence
<
2
,
128
>
;
constexpr
index_t
GemmBBlockCopySrcDataPerRead_GemmN
=
1
;
constexpr
index_t
GemmBBlockCopyDstDataPerWrite_GemmN
=
1
;
...
...
@@ -80,8 +80,8 @@ void device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw(InDesc i
constexpr
index_t
GemmM
=
C
*
Y
*
X
;
constexpr
index_t
GemmN
=
N
*
Ho
*
Wo
;
constexpr
index_t
GridSize
=
((
GemmM
+
GemmMPerBlock
-
1
)
/
GemmMPerBlock
)
*
((
GemmN
+
GemmNPerBlock
-
1
)
/
GemmNPerBlock
);
constexpr
index_t
GridSize
=
math
::
integer_divide_ceil
(
GemmM
,
GemmMPerBlock
)
*
math
::
integer_divide_ceil
(
GemmN
,
GemmNPerBlock
);
printf
(
"%s: BlockSize %u, GridSize %u
\n
"
,
__func__
,
BlockSize
,
GridSize
);
...
...
@@ -95,8 +95,8 @@ void device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw(InDesc i
decltype
(
out_nkhw_desc
),
ConvStrides
,
ConvDilations
,
LeftPads
,
RightPads
,
In
LeftPads
,
In
RightPads
,
GemmMPerBlock
,
GemmNPerBlock
,
GemmKPerBlock
,
...
...
driver/include/device_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp
View file @
a3230a64
...
...
@@ -11,8 +11,8 @@ template <typename T,
typename
OutDesc
,
typename
ConvStrides
,
typename
ConvDilations
,
typename
LeftPads
,
typename
RightPads
>
typename
In
LeftPads
,
typename
In
RightPads
>
void
device_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw
(
InDesc
in_nchw_desc
,
Tensor
<
T
>&
in_nchw
,
WeiDesc
wei_kcyx_desc
,
...
...
@@ -21,8 +21,8 @@ void device_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw(InDesc i
const
Tensor
<
T
>&
out_nkhw
,
ConvStrides
,
ConvDilations
,
LeftPads
,
RightPads
,
In
LeftPads
,
In
RightPads
,
std
::
size_t
nrepeat
)
{
using
namespace
ck
;
...
...
@@ -101,8 +101,8 @@ void device_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw(InDesc i
constexpr
index_t
GemmM
=
C
*
Ytilda
*
Xtilda
;
constexpr
index_t
GemmN
=
N
*
Htilda
*
Wtilda
;
constexpr
index_t
GridSize
=
((
GemmM
+
GemmMPerBlock
-
1
)
/
GemmMPerBlock
)
*
((
GemmN
+
GemmNPerBlock
-
1
)
/
GemmNPerBlock
);
constexpr
index_t
GridSize
=
math
::
integer_divide_ceil
(
GemmM
,
GemmMPerBlock
)
*
math
::
integer_divide_ceil
(
GemmN
,
GemmNPerBlock
);
printf
(
"%s: BlockSize %u, GridSize %u
\n
"
,
__func__
,
BlockSize
,
GridSize
);
...
...
@@ -116,8 +116,8 @@ void device_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw(InDesc i
decltype
(
out_nkhw_desc
),
ConvStrides
,
ConvDilations
,
LeftPads
,
RightPads
,
In
LeftPads
,
In
RightPads
,
GemmMPerBlock
,
GemmNPerBlock
,
GemmKPerBlock
,
...
...
driver/include/device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
View file @
a3230a64
...
...
@@ -11,8 +11,8 @@ template <class T,
class
OutDesc
,
class
ConvStrides
,
class
ConvDilations
,
class
LeftPads
,
class
RightPads
>
class
In
LeftPads
,
class
In
RightPads
>
void
device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw
(
InDesc
,
const
Tensor
<
T
>&
in_nchw
,
WeiDesc
,
...
...
@@ -21,8 +21,8 @@ void device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc,
Tensor
<
T
>&
out_nkhw
,
ConvStrides
,
ConvDilations
,
LeftPads
,
RightPads
,
In
LeftPads
,
In
RightPads
,
ck
::
index_t
nrepeat
)
{
using
namespace
ck
;
...
...
@@ -181,10 +181,11 @@ void device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc,
constexpr
index_t
GemmCThreadCopyDstDataPerWrite_GemmN1
=
2
;
#endif
constexpr
index_t
B
=
N
*
Ho
*
Wo
;
constexpr
index_t
GemmM
=
K
;
constexpr
index_t
GemmN
=
N
*
Ho
*
Wo
;
constexpr
index_t
GridSize
=
((
B
+
GemmNPerBlock
-
1
)
/
GemmNPerBlock
)
*
((
K
+
GemmMPerBlock
-
1
)
/
Gemm
M
PerBlock
);
constexpr
index_t
GridSize
=
math
::
integer_divide_ceil
(
GemmM
,
GemmMPerBlock
)
*
math
::
integer_divide_ceil
(
GemmN
,
Gemm
N
PerBlock
);
printf
(
"%s: BlockSize %u, GridSize %u
\n
"
,
__func__
,
BlockSize
,
GridSize
);
...
...
@@ -198,8 +199,8 @@ void device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc,
decltype
(
out_nkhw_desc
),
ConvStrides
,
ConvDilations
,
LeftPads
,
RightPads
,
In
LeftPads
,
In
RightPads
,
GemmMPerBlock
,
GemmNPerBlock
,
GemmKPerBlock
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment