Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
3549e344
"docs/vscode:/vscode.git/clone" did not exist on "c211e7b669c72a35dc8c128f2af20ac928f73280"
Commit
3549e344
authored
Jul 25, 2022
by
Chao Liu
Browse files
adding group
parent
c0bfcf91
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
878 additions
and
298 deletions
+878
-298
example/09_convnd_fwd/convnd_fwd_common.hpp
example/09_convnd_fwd/convnd_fwd_common.hpp
+11
-65
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
+40
-21
include/ck/tensor_operation/gpu/device/device_conv_fwd_multiple_d_xdl_cshuffle.hpp
...on/gpu/device/device_conv_fwd_multiple_d_xdl_cshuffle.hpp
+410
-119
include/ck/tensor_operation/gpu/device/tensor_layout.hpp
include/ck/tensor_operation/gpu/device/tensor_layout.hpp
+213
-27
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
...ary/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+1
-6
library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp
...ary/utility/convolution_host_tensor_descriptor_helper.hpp
+190
-60
library/include/ck/library/utility/host_tensor.hpp
library/include/ck/library/utility/host_tensor.hpp
+13
-0
No files found.
example/09_convnd_fwd/convnd_fwd_common.hpp
View file @
3549e344
...
...
@@ -112,50 +112,14 @@ int run_conv_fwd(bool do_verification,
const
WeiElementOp
&
wei_element_op
,
const
OutElementOp
&
out_element_op
)
{
#if 0
const auto in_g_n_c_wis_desc = ck::utils::conv::get_input_host_tensor_descriptor<InLayout>(conv_param);
const auto wei_g_k_c_xs_desc = ck::utils::conv::get_weight_host_tensor_descriptor<WeiLayout>(conv_param);
const auto out_g_n_k_wos_desc = ck::utils::conv::get_output_host_tensor_descriptor<OutLayout>(conv_param);
#else
const
auto
in_g_n_wis_c_desc
=
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
conv_param
.
G_
),
static_cast
<
std
::
size_t
>
(
conv_param
.
N_
),
static_cast
<
std
::
size_t
>
(
conv_param
.
input_spatial_lengths_
[
0
]),
static_cast
<
std
::
size_t
>
(
conv_param
.
input_spatial_lengths_
[
1
]),
static_cast
<
std
::
size_t
>
(
conv_param
.
C_
)});
const
auto
wei_g_k_xs_c_desc
=
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
conv_param
.
G_
),
static_cast
<
std
::
size_t
>
(
conv_param
.
K_
),
static_cast
<
std
::
size_t
>
(
conv_param
.
filter_spatial_lengths_
[
0
]),
static_cast
<
std
::
size_t
>
(
conv_param
.
filter_spatial_lengths_
[
1
]),
static_cast
<
std
::
size_t
>
(
conv_param
.
C_
)});
const
auto
bias_g_n_wos_k_desc
=
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
conv_param
.
G_
),
static_cast
<
std
::
size_t
>
(
conv_param
.
N_
),
static_cast
<
std
::
size_t
>
(
conv_param
.
output_spatial_lengths_
[
0
]),
static_cast
<
std
::
size_t
>
(
conv_param
.
output_spatial_lengths_
[
1
]),
static_cast
<
std
::
size_t
>
(
conv_param
.
K_
)},
std
::
vector
<
std
::
size_t
>
{
0
,
0
,
0
,
0
,
1
});
const
auto
out_g_n_wos_k_desc
=
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
conv_param
.
G_
),
static_cast
<
std
::
size_t
>
(
conv_param
.
N_
),
static_cast
<
std
::
size_t
>
(
conv_param
.
output_spatial_lengths_
[
0
]),
static_cast
<
std
::
size_t
>
(
conv_param
.
output_spatial_lengths_
[
1
]),
static_cast
<
std
::
size_t
>
(
conv_param
.
K_
)});
// tensor descriptor in NCHW/KXYC/NKHW dimensional order
const
auto
in_g_n_c_wis_desc
=
transpose_host_tensor_descriptor_given_new2old
(
in_g_n_wis_c_desc
,
std
::
vector
<
ck
::
index_t
>
{
0
,
1
,
4
,
2
,
3
});
const
auto
wei_g_k_c_xs_desc
=
transpose_host_tensor_descriptor_given_new2old
(
wei_g_k_xs_c_desc
,
std
::
vector
<
ck
::
index_t
>
{
0
,
1
,
4
,
2
,
3
});
const
auto
bias_g_n_k_wos_desc
=
transpose_host_tensor_descriptor_given_new2old
(
bias_g_n_wos_k_desc
,
std
::
vector
<
ck
::
index_t
>
{
0
,
1
,
4
,
2
,
3
});
const
auto
out_g_n_k_wos_desc
=
transpose_host_tensor_descriptor_given_new2old
(
out_g_n_wos_k_desc
,
std
::
vector
<
ck
::
index_t
>
{
0
,
1
,
4
,
2
,
3
});
#endif
const
auto
in_g_n_c_wis_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_packed
<
InLayout
>
(
conv_param
);
const
auto
wei_g_k_c_xs_desc
=
ck
::
utils
::
conv
::
make_weight_host_tensor_descriptor_packed
<
WeiLayout
>
(
conv_param
);
const
auto
bias_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_packed
<
OutLayout
>
(
conv_param
);
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_packed
<
OutLayout
>
(
conv_param
);
Tensor
<
InDataType
>
in
(
in_g_n_c_wis_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
...
...
@@ -267,9 +231,6 @@ int run_conv_fwd(bool do_verification,
Tensor
<
OutDataType
>
c_host
(
out_g_n_k_wos_desc
);
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
,
...
...
@@ -291,24 +252,9 @@ int run_conv_fwd(bool do_verification,
ref_invoker
.
Run
(
ref_argument
);
for
(
int
g
=
0
;
g
<
out_host
.
mDesc
.
GetLengths
()[
0
];
g
++
)
{
for
(
int
n
=
0
;
n
<
out_host
.
mDesc
.
GetLengths
()[
1
];
n
++
)
{
for
(
int
k
=
0
;
k
<
out_host
.
mDesc
.
GetLengths
()[
2
];
k
++
)
{
for
(
int
ho
=
0
;
ho
<
out_host
.
mDesc
.
GetLengths
()[
3
];
ho
++
)
{
for
(
int
wo
=
0
;
wo
<
out_host
.
mDesc
.
GetLengths
()[
4
];
wo
++
)
{
out_element_op
(
out_host
(
g
,
n
,
k
,
ho
,
wo
),
c_host
(
g
,
n
,
k
,
ho
,
wo
),
bias
(
g
,
n
,
k
,
ho
,
wo
));
}
}
}
}
}
// TODO: implement elementwise operation for host
out_host
.
ForEach
(
[
&
](
auto
&
,
auto
idx
)
{
out_element_op
(
out_host
(
idx
),
c_host
(
idx
),
bias
(
idx
));
});
out_device_buf
.
FromDevice
(
out_device
.
mData
.
data
());
...
...
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
View file @
3549e344
...
...
@@ -71,22 +71,41 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
template
<
ck
::
index_t
NDimSpatial
>
using
DeviceConvNDFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceConvFwdMultipleD_Xdl_CShuffle
<
NDimSpatial
,
#if 0
ck::tuple_element_t<NDimSpatial - 1,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>>
,
ck::Tuple<ck::tensor_layout::convolution::
G_
NW
_
C,
ck::tensor_layout::convolution::
G_
NHW
_
C,
ck::tensor_layout::convolution::
G_
NDHW
_
C>>,
ck::tuple_element_t<NDimSpatial - 1,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KX
C
,
ck
::
tensor_layout
::
convolution
::
K
YXC
,
ck
::
tensor_layout
::
convolution
::
K
ZYXC
>>
,
ck::Tuple<ck::tensor_layout::convolution::
G_K_X_
C,
ck::tensor_layout::convolution::
G_K_
YX
_
C,
ck::tensor_layout::convolution::
G_K_
ZYX
_
C>>,
ck::Tuple<ck::tuple_element_t<NDimSpatial - 1,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NW_K
,
ck
::
tensor_layout
::
convolution
::
NHW_K
,
ck
::
tensor_layout
::
convolution
::
NDHW_K
>>>
,
ck::Tuple<ck::tensor_layout::convolution::G_NW_K,
ck::tensor_layout::convolution::G_NHW_K,
ck::tensor_layout::convolution::G_NDHW_K>>>,
ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::G_NW_K,
ck::tensor_layout::convolution::G_NHW_K,
ck::tensor_layout::convolution::G_NDHW_K>>,
#else
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWGC
,
ck
::
tensor_layout
::
convolution
::
NHWGC
,
ck
::
tensor_layout
::
convolution
::
NDHWGC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>>
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KXGC
,
ck
::
tensor_layout
::
convolution
::
KYXGC
,
ck
::
tensor_layout
::
convolution
::
KZYXGC
>>
,
ck
::
Tuple
<
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWGK
,
ck
::
tensor_layout
::
convolution
::
NHWGK
,
ck
::
tensor_layout
::
convolution
::
NDHWGK
>>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWGK
,
ck
::
tensor_layout
::
convolution
::
NHWGK
,
ck
::
tensor_layout
::
convolution
::
NDHWGK
>>
,
#endif
InDataType
,
WeiDataType
,
AccDataType
,
...
...
@@ -167,9 +186,9 @@ int main(int argc, char* argv[])
if
(
num_dim_spatial
==
1
)
{
return
run_conv_fwd
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tensor_layout
::
convolution
::
NW
G
C
,
ck
::
tensor_layout
::
convolution
::
KX
G
C
,
ck
::
tensor_layout
::
convolution
::
NW
G
K
,
InDataType
,
WeiDataType
,
OutDataType
,
...
...
@@ -187,9 +206,9 @@ int main(int argc, char* argv[])
else
if
(
num_dim_spatial
==
2
)
{
return
run_conv_fwd
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NHW
G
C
,
ck
::
tensor_layout
::
convolution
::
KYX
G
C
,
ck
::
tensor_layout
::
convolution
::
NHW
G
K
,
InDataType
,
WeiDataType
,
OutDataType
,
...
...
@@ -207,9 +226,9 @@ int main(int argc, char* argv[])
else
if
(
num_dim_spatial
==
3
)
{
return
run_conv_fwd
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
ck
::
tensor_layout
::
convolution
::
NDHW
G
C
,
ck
::
tensor_layout
::
convolution
::
KZYX
G
C
,
ck
::
tensor_layout
::
convolution
::
NDHW
G
K
,
InDataType
,
WeiDataType
,
OutDataType
,
...
...
include/ck/tensor_operation/gpu/device/device_conv_fwd_multiple_d_xdl_cshuffle.hpp
View file @
3549e344
...
...
@@ -216,9 +216,9 @@ __global__ void
//
// Supports:
// @li Forward convolution with up to 3 spatial dimentions
// @li Input tensor in NWC data format
// @li Weight tensor in KXC data format
// @li Output tensor in NWK data format
// @li Input tensor in
G
NWC data format
// @li Weight tensor in
G
KXC data format
// @li Output tensor in
G
NWK data format
//
// 1D:
// out[N, Wo, K] = in[N, Wi, C] * wei[K, X, C]
...
...
@@ -302,7 +302,7 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
template
<
typename
ALay
,
typename
std
::
enable_if
<
NDimSpatial
==
1
&&
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
NWC
>,
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
G
NWC
>,
bool
>::
type
=
false
>
static
auto
MakeAGridDescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
...
...
@@ -319,16 +319,6 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
const
index_t
N
=
a_g_n_c_wis_lengths
[
1
];
const
index_t
C
=
a_g_n_c_wis_lengths
[
2
];
const
index_t
GemmMRaw
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
index_t
GemmKRaw
=
C
*
std
::
accumulate
(
b_g_k_c_xs_lengths
.
begin
()
+
3
,
b_g_k_c_xs_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
index_t
Wi
=
a_g_n_c_wis_lengths
[
3
];
const
index_t
Wo
=
e_g_n_k_wos_lengths
[
3
];
...
...
@@ -338,8 +328,13 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
if
constexpr
(
ConvForwardSpecialization
==
ConvolutionForwardSpecialization
::
Filter1x1Stride1Pad0
)
{
const
index_t
NWo
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
auto
in_gemmmraw_gemmk_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
GemmMRaw
,
GemmKRaw
));
make_naive_tensor_descriptor_packed
(
make_tuple
(
NWo
,
C
));
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmk_grid_desc
);
...
...
@@ -414,7 +409,7 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
template
<
typename
ALay
,
typename
std
::
enable_if
<
NDimSpatial
==
2
&&
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
NHWC
>,
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
G
NHWC
>,
bool
>::
type
=
false
>
static
auto
MakeAGridDescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
...
...
@@ -431,16 +426,6 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
const
index_t
N
=
a_g_n_c_wis_lengths
[
1
];
const
index_t
C
=
a_g_n_c_wis_lengths
[
2
];
const
index_t
GemmMRaw
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
index_t
GemmKRaw
=
C
*
std
::
accumulate
(
b_g_k_c_xs_lengths
.
begin
()
+
3
,
b_g_k_c_xs_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
index_t
Hi
=
a_g_n_c_wis_lengths
[
3
];
const
index_t
Wi
=
a_g_n_c_wis_lengths
[
4
];
...
...
@@ -453,8 +438,13 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
if
constexpr
(
ConvForwardSpecialization
==
ConvolutionForwardSpecialization
::
Filter1x1Stride1Pad0
)
{
const
index_t
NHoWo
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
auto
in_gemmmraw_gemmkraw_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
GemmMRaw
,
GemmKRaw
));
make_naive_tensor_descriptor_packed
(
make_tuple
(
NHoWo
,
C
));
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmkraw_grid_desc
);
...
...
@@ -539,8 +529,8 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
}
template
<
typename
ALay
,
typename
std
::
enable_if
<
NDimSpatial
==
2
&&
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
G
_N_
HW
_
C
>,
typename
std
::
enable_if
<
NDimSpatial
==
3
&&
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
G
ND
HWC
>,
bool
>::
type
=
false
>
static
auto
MakeAGridDescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
...
...
@@ -557,16 +547,272 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
const
index_t
N
=
a_g_n_c_wis_lengths
[
1
];
const
index_t
C
=
a_g_n_c_wis_lengths
[
2
];
const
index_t
GemmMRaw
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
const
index_t
Di
=
a_g_n_c_wis_lengths
[
3
];
const
index_t
Hi
=
a_g_n_c_wis_lengths
[
4
];
const
index_t
Wi
=
a_g_n_c_wis_lengths
[
5
];
const
index_t
Do
=
e_g_n_k_wos_lengths
[
3
];
const
index_t
Ho
=
e_g_n_k_wos_lengths
[
4
];
const
index_t
Wo
=
e_g_n_k_wos_lengths
[
5
];
const
index_t
ConvStrideD
=
conv_filter_strides
[
0
];
const
index_t
ConvStrideH
=
conv_filter_strides
[
1
];
const
index_t
ConvStrideW
=
conv_filter_strides
[
2
];
if
constexpr
(
ConvForwardSpecialization
==
ConvolutionForwardSpecialization
::
Filter1x1Stride1Pad0
)
{
const
index_t
NDoHoWo
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
index_t
GemmKRaw
=
C
*
std
::
accumulate
(
b_g_k_c_xs_lengths
.
begin
()
+
3
,
b_g_k_c_xs_lengths
.
begin
()
+
3
+
NDimSpatial
,
const
auto
in_gemmmraw_gemmkraw_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
NDoHoWo
,
C
));
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmkraw_grid_desc
);
return
in_gemmm_gemmk_grid_desc
;
}
else
if
constexpr
(
ConvForwardSpecialization
==
ConvolutionForwardSpecialization
::
Filter1x1Pad0
)
{
const
auto
in_n_di_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
N
,
Di
,
Hi
,
Wi
,
C
));
const
auto
in_n_do_ho_wo_c_grid_desc
=
transform_tensor_descriptor
(
in_n_di_hi_wi_c_grid_desc
,
make_tuple
(
make_pass_through_transform
(
N
),
make_embed_transform
(
make_tuple
(
Do
),
make_tuple
(
ConvStrideD
)),
make_embed_transform
(
make_tuple
(
Ho
),
make_tuple
(
ConvStrideH
)),
make_embed_transform
(
make_tuple
(
Wo
),
make_tuple
(
ConvStrideW
)),
make_pass_through_transform
(
C
)),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{},
Sequence
<
4
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{},
Sequence
<
4
>
{}));
const
auto
in_gemmmraw_gemmkraw_grid_desc
=
transform_tensor_descriptor
(
in_n_do_ho_wo_c_grid_desc
,
make_tuple
(
make_merge_transform
(
make_tuple
(
N
,
Do
,
Ho
,
Wo
)),
make_pass_through_transform
(
C
)),
make_tuple
(
Sequence
<
0
,
1
,
2
,
3
>
{},
Sequence
<
4
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmkraw_grid_desc
);
return
in_gemmm_gemmk_grid_desc
;
}
else
{
const
index_t
Z
=
b_g_k_c_xs_lengths
[
3
];
const
index_t
Y
=
b_g_k_c_xs_lengths
[
4
];
const
index_t
X
=
b_g_k_c_xs_lengths
[
5
];
const
index_t
ConvDilationD
=
conv_filter_dilations
[
0
];
const
index_t
ConvDilationH
=
conv_filter_dilations
[
1
];
const
index_t
ConvDilationW
=
conv_filter_dilations
[
2
];
const
index_t
InLeftPadD
=
input_left_pads
[
0
];
const
index_t
InLeftPadH
=
input_left_pads
[
1
];
const
index_t
InLeftPadW
=
input_left_pads
[
2
];
const
index_t
InRightPadD
=
input_right_pads
[
0
];
const
index_t
InRightPadH
=
input_right_pads
[
1
];
const
index_t
InRightPadW
=
input_right_pads
[
2
];
const
auto
in_n_di_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
N
,
Di
,
Hi
,
Wi
,
C
));
const
auto
in_n_hip_wip_c_grid_desc
=
transform_tensor_descriptor
(
in_n_di_hi_wi_c_grid_desc
,
make_tuple
(
make_pass_through_transform
(
N
),
make_pad_transform
(
Di
,
InLeftPadD
,
InRightPadD
),
make_pad_transform
(
Hi
,
InLeftPadH
,
InRightPadH
),
make_pad_transform
(
Wi
,
InLeftPadW
,
InRightPadW
),
make_pass_through_transform
(
C
)),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{},
Sequence
<
4
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{},
Sequence
<
4
>
{}));
const
auto
in_n_z_do_y_ho_x_wo_c_grid_desc
=
transform_tensor_descriptor
(
in_n_hip_wip_c_grid_desc
,
make_tuple
(
make_pass_through_transform
(
N
),
make_embed_transform
(
make_tuple
(
Z
,
Do
),
make_tuple
(
ConvDilationD
,
ConvStrideD
)),
make_embed_transform
(
make_tuple
(
Y
,
Ho
),
make_tuple
(
ConvDilationH
,
ConvStrideH
)),
make_embed_transform
(
make_tuple
(
X
,
Wo
),
make_tuple
(
ConvDilationW
,
ConvStrideW
)),
make_pass_through_transform
(
C
)),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{},
Sequence
<
4
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
2
>
{},
Sequence
<
3
,
4
>
{},
Sequence
<
5
,
6
>
{},
Sequence
<
7
>
{}));
const
auto
in_gemmmraw_gemmkraw_grid_desc
=
transform_tensor_descriptor
(
in_n_z_do_y_ho_x_wo_c_grid_desc
,
make_tuple
(
make_merge_transform
(
make_tuple
(
N
,
Do
,
Ho
,
Wo
)),
make_merge_transform
(
make_tuple
(
Z
,
Y
,
X
,
C
))),
make_tuple
(
Sequence
<
0
,
2
,
4
,
6
>
{},
Sequence
<
1
,
3
,
5
,
7
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmkraw_grid_desc
);
return
in_gemmm_gemmk_grid_desc
;
}
}
// TODO: implement ck::tensor_layout::convolution that describe packed/strided dimemsion as
// properties
template
<
typename
ALay
,
typename
std
::
enable_if
<
NDimSpatial
==
1
&&
(
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
G_NW_C
>
||
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
NWGC
>
),
bool
>::
type
=
false
>
static
auto
MakeAGridDescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_k_wos_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
)
{
const
index_t
N
=
a_g_n_c_wis_lengths
[
1
];
const
index_t
C
=
a_g_n_c_wis_lengths
[
2
];
const
index_t
Wi
=
a_g_n_c_wis_lengths
[
3
];
const
index_t
Wo
=
e_g_n_k_wos_lengths
[
3
];
const
index_t
ConvStrideW
=
conv_filter_strides
[
0
];
if
constexpr
(
ConvForwardSpecialization
==
ConvolutionForwardSpecialization
::
Filter1x1Stride1Pad0
)
{
const
index_t
NHoWo
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
// This is different
const
index_t
WiStride
=
a_g_n_c_wis_strides
[
2
+
NDimSpatial
];
const
auto
CStride
=
I1
;
const
auto
in_gemmmraw_gemmk_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
NHoWo
,
C
),
make_tuple
(
WiStride
,
CStride
));
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmk_grid_desc
);
return
in_gemmm_gemmk_grid_desc
;
}
else
if
constexpr
(
ConvForwardSpecialization
==
ConvolutionForwardSpecialization
::
Filter1x1Pad0
)
{
// This is different
const
index_t
NStride
=
a_g_n_c_wis_strides
[
1
];
const
index_t
WiStride
=
a_g_n_c_wis_strides
[
3
];
const
auto
CStride
=
I1
;
const
auto
in_n_wi_c_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
N
,
Wi
,
C
),
make_tuple
(
NStride
,
WiStride
,
CStride
));
const
auto
in_n_wo_c_grid_desc
=
transform_tensor_descriptor
(
in_n_wi_c_grid_desc
,
make_tuple
(
make_pass_through_transform
(
N
),
make_embed_transform
(
make_tuple
(
Wo
),
make_tuple
(
ConvStrideW
)),
make_pass_through_transform
(
C
)),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{}));
const
auto
in_gemmmraw_gemmkraw_grid_desc
=
transform_tensor_descriptor
(
in_n_wo_c_grid_desc
,
make_tuple
(
make_merge_transform
(
make_tuple
(
N
,
Wo
)),
make_pass_through_transform
(
C
)),
make_tuple
(
Sequence
<
0
,
1
>
{},
Sequence
<
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmkraw_grid_desc
);
return
in_gemmm_gemmk_grid_desc
;
}
else
{
const
index_t
X
=
b_g_k_c_xs_lengths
[
3
];
const
index_t
ConvDilationW
=
conv_filter_dilations
[
0
];
const
index_t
InLeftPadW
=
input_left_pads
[
0
];
const
index_t
InRightPadW
=
input_right_pads
[
0
];
// This is different
const
index_t
NStride
=
a_g_n_c_wis_strides
[
1
];
const
index_t
WiStride
=
a_g_n_c_wis_strides
[
3
];
const
auto
CStride
=
I1
;
const
auto
in_n_wi_c_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
N
,
Wi
,
C
),
make_tuple
(
NStride
,
WiStride
,
CStride
));
const
auto
in_n_wip_c_grid_desc
=
transform_tensor_descriptor
(
in_n_wi_c_grid_desc
,
make_tuple
(
make_pass_through_transform
(
N
),
make_pad_transform
(
Wi
,
InLeftPadW
,
InRightPadW
),
make_pass_through_transform
(
C
)),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{}));
const
auto
in_n_x_wo_c_grid_desc
=
transform_tensor_descriptor
(
in_n_wip_c_grid_desc
,
make_tuple
(
make_pass_through_transform
(
N
),
make_embed_transform
(
make_tuple
(
X
,
Wo
),
make_tuple
(
ConvDilationW
,
ConvStrideW
)),
make_pass_through_transform
(
C
)),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
2
>
{},
Sequence
<
3
>
{}));
const
auto
in_gemmmraw_gemmk_grid_desc
=
transform_tensor_descriptor
(
in_n_x_wo_c_grid_desc
,
make_tuple
(
make_merge_transform
(
make_tuple
(
N
,
Wo
)),
make_merge_transform
(
make_tuple
(
X
,
C
))),
make_tuple
(
Sequence
<
0
,
2
>
{},
Sequence
<
1
,
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmk_grid_desc
);
return
in_gemmm_gemmk_grid_desc
;
}
}
template
<
typename
ALay
,
typename
std
::
enable_if
<
NDimSpatial
==
2
&&
(
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
G_NHW_C
>
||
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
NHWGC
>
),
bool
>::
type
=
false
>
static
auto
MakeAGridDescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_k_wos_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
)
{
const
index_t
N
=
a_g_n_c_wis_lengths
[
1
];
const
index_t
C
=
a_g_n_c_wis_lengths
[
2
];
const
index_t
Hi
=
a_g_n_c_wis_lengths
[
3
];
const
index_t
Wi
=
a_g_n_c_wis_lengths
[
4
];
...
...
@@ -579,12 +825,17 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
if
constexpr
(
ConvForwardSpecialization
==
ConvolutionForwardSpecialization
::
Filter1x1Stride1Pad0
)
{
const
index_t
NHoWo
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
// This is different
const
index_t
C
Stride
=
a_g_n_c_wis_strides
[
2
];
const
index_t
WStride
=
a_g_n_c_wis_strides
[
2
+
NDimSpatial
]
;
const
index_t
Wi
Stride
=
a_g_n_c_wis_strides
[
2
+
NDimSpatial
];
const
auto
CStride
=
I1
;
const
auto
in_gemmmraw_gemmkraw_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
GemmMRaw
,
GemmKRaw
),
make_tuple
(
WStride
,
CStride
);
make_naive_tensor_descriptor
(
make_tuple
(
NHoWo
,
C
),
make_tuple
(
W
i
Stride
,
CStride
)
)
;
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmkraw_grid_desc
);
...
...
@@ -595,12 +846,13 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
ConvolutionForwardSpecialization
::
Filter1x1Pad0
)
{
// This is different
const
auto
in_n_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
N
,
Hi
,
Wi
,
C
),
make_tuple
(
a_g_n_c_wis_srides
[
1
],
a_g_n_c_wis_srides
[
3
],
a_g_n_c_wis_srides
[
4
],
a_g_n_c_wis_srides
[
2
]));
const
index_t
NStride
=
a_g_n_c_wis_strides
[
1
];
const
index_t
HiStride
=
a_g_n_c_wis_strides
[
3
];
const
index_t
WiStride
=
a_g_n_c_wis_strides
[
4
];
const
auto
CStride
=
I1
;
const
auto
in_n_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
N
,
Hi
,
Wi
,
C
),
make_tuple
(
NStride
,
HiStride
,
WiStride
,
CStride
));
const
auto
in_n_ho_wo_c_grid_desc
=
transform_tensor_descriptor
(
in_n_hi_wi_c_grid_desc
,
...
...
@@ -638,12 +890,13 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
const
index_t
InRightPadW
=
input_right_pads
[
1
];
// This is different
const
auto
in_n_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
N
,
Hi
,
Wi
,
C
),
make_tuple
(
a_g_n_c_wis_srides
[
1
],
a_g_n_c_wis_srides
[
3
],
a_g_n_c_wis_srides
[
4
],
a_g_n_c_wis_srides
[
2
]));
const
index_t
NStride
=
a_g_n_c_wis_strides
[
1
];
const
index_t
HiStride
=
a_g_n_c_wis_strides
[
3
];
const
index_t
WiStride
=
a_g_n_c_wis_strides
[
4
];
const
auto
CStride
=
I1
;
const
auto
in_n_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
N
,
Hi
,
Wi
,
C
),
make_tuple
(
NStride
,
HiStride
,
WiStride
,
CStride
));
const
auto
in_n_hip_wip_c_grid_desc
=
transform_tensor_descriptor
(
in_n_hi_wi_c_grid_desc
,
...
...
@@ -680,7 +933,8 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
template
<
typename
ALay
,
typename
std
::
enable_if
<
NDimSpatial
==
3
&&
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
NDHWC
>,
(
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
G_NDHW_C
>
||
is_same_v
<
ALay
,
tensor_layout
::
convolution
::
NDHWGC
>
),
bool
>::
type
=
false
>
static
auto
MakeAGridDescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
...
...
@@ -697,16 +951,6 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
const
index_t
N
=
a_g_n_c_wis_lengths
[
1
];
const
index_t
C
=
a_g_n_c_wis_lengths
[
2
];
const
index_t
GemmMRaw
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
index_t
GemmKRaw
=
C
*
std
::
accumulate
(
b_g_k_c_xs_lengths
.
begin
()
+
3
,
b_g_k_c_xs_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
index_t
Di
=
a_g_n_c_wis_lengths
[
3
];
const
index_t
Hi
=
a_g_n_c_wis_lengths
[
4
];
const
index_t
Wi
=
a_g_n_c_wis_lengths
[
5
];
...
...
@@ -722,8 +966,18 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
if
constexpr
(
ConvForwardSpecialization
==
ConvolutionForwardSpecialization
::
Filter1x1Stride1Pad0
)
{
const
index_t
NDoHoWo
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
// This is different
const
index_t
WiStride
=
a_g_n_c_wis_strides
[
2
+
NDimSpatial
];
const
auto
CStride
=
I1
;
const
auto
in_gemmmraw_gemmkraw_grid_desc
=
make_naive_tensor_descriptor
_packed
(
make_tuple
(
GemmMRaw
,
GemmKRaw
));
make_naive_tensor_descriptor
(
make_tuple
(
NDoHoWo
,
C
),
make_tuple
(
WiStride
,
CStride
));
const
auto
in_gemmm_gemmk_grid_desc
=
matrix_padder
.
PadADescriptor_M_K
(
in_gemmmraw_gemmkraw_grid_desc
);
...
...
@@ -733,8 +987,16 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
else
if
constexpr
(
ConvForwardSpecialization
==
ConvolutionForwardSpecialization
::
Filter1x1Pad0
)
{
const
auto
in_n_di_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
N
,
Di
,
Hi
,
Wi
,
C
));
// This is different
const
index_t
NStride
=
a_g_n_c_wis_strides
[
1
];
const
index_t
DiStride
=
a_g_n_c_wis_strides
[
3
];
const
index_t
HiStride
=
a_g_n_c_wis_strides
[
4
];
const
index_t
WiStride
=
a_g_n_c_wis_strides
[
5
];
const
auto
CStride
=
I1
;
const
auto
in_n_di_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
N
,
Di
,
Hi
,
Wi
,
C
),
make_tuple
(
NStride
,
DiStride
,
HiStride
,
WiStride
,
CStride
));
const
auto
in_n_do_ho_wo_c_grid_desc
=
transform_tensor_descriptor
(
in_n_di_hi_wi_c_grid_desc
,
...
...
@@ -778,8 +1040,16 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
const
index_t
InRightPadH
=
input_right_pads
[
1
];
const
index_t
InRightPadW
=
input_right_pads
[
2
];
const
auto
in_n_di_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
N
,
Di
,
Hi
,
Wi
,
C
));
// This is different
const
index_t
NStride
=
a_g_n_c_wis_strides
[
1
];
const
index_t
DiStride
=
a_g_n_c_wis_strides
[
3
];
const
index_t
HiStride
=
a_g_n_c_wis_strides
[
4
];
const
index_t
WiStride
=
a_g_n_c_wis_strides
[
5
];
const
auto
CStride
=
I1
;
const
auto
in_n_di_hi_wi_c_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
N
,
Di
,
Hi
,
Wi
,
C
),
make_tuple
(
NStride
,
DiStride
,
HiStride
,
WiStride
,
CStride
));
const
auto
in_n_hip_wip_c_grid_desc
=
transform_tensor_descriptor
(
in_n_di_hi_wi_c_grid_desc
,
...
...
@@ -824,9 +1094,9 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
}
template
<
typename
BLay
,
typename
std
::
enable_if
<
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
KXC
>
||
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
KYXC
>
||
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
KZYXC
>
,
typename
std
::
enable_if
<
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
G
KXC
>
||
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
G
KYXC
>
||
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
G
KZYXC
>
,
bool
>::
type
=
false
>
static
auto
MakeBGridDescriptor_N_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
...
...
@@ -835,15 +1105,12 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
const
index_t
K
=
b_g_k_c_xs_lengths
[
1
];
const
index_t
C
=
b_g_k_c_xs_lengths
[
2
];
const
index_t
GemmNRaw
=
K
;
const
index_t
GemmKRaw
=
C
*
std
::
accumulate
(
b_g_k_c_xs_lengths
.
begin
()
+
3
,
const
index_t
YX
=
std
::
accumulate
(
b_g_k_c_xs_lengths
.
begin
()
+
3
,
b_g_k_c_xs_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
auto
wei_k_yxc_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
GemmNRaw
,
GemmKRaw
));
const
auto
wei_k_yxc_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
K
,
YX
*
C
));
const
auto
wei_gemmn_gemmk_grid_desc
=
matrix_padder
.
PadBDescriptor_N_K
(
wei_k_yxc_grid_desc
);
...
...
@@ -854,7 +1121,10 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
template
<
typename
BLay
,
typename
std
::
enable_if
<
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
G_K_X_C
>
||
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
G_K_YX_C
>
||
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
G_K_ZYX_C
>
,
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
G_K_ZYX_C
>
||
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
KXGC
>
||
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
KYXGC
>
||
is_same_v
<
BLay
,
tensor_layout
::
convolution
::
KZYXGC
>
,
bool
>::
type
=
false
>
static
auto
MakeBGridDescriptor_N_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
...
...
@@ -863,26 +1133,34 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
const
index_t
K
=
b_g_k_c_xs_lengths
[
1
];
const
index_t
C
=
b_g_k_c_xs_lengths
[
2
];
const
index_t
GemmNRaw
=
K
;
const
index_t
GemmKRaw
=
C
*
std
::
accumulate
(
b_g_k_c_xs_lengths
.
begin
()
+
3
,
const
index_t
YX
=
std
::
accumulate
(
b_g_k_c_xs_lengths
.
begin
()
+
3
,
b_g_k_c_xs_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
auto
wei_k_yxc_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
GemmNRaw
,
GemmKRaw
));
const
index_t
KStride
=
b_g_k_c_xs_strides
[
1
];
const
index_t
XStride
=
b_g_k_c_xs_strides
[
2
+
NDimSpatial
];
const
auto
CStride
=
I1
;
const
auto
wei_k_yx_c_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
K
,
YX
,
C
),
make_tuple
(
KStride
,
XStride
,
CStride
));
const
auto
wei_gemmnraw_gemmkraw_grid_desc
=
transform_tensor_descriptor
(
wei_k_yx_c_grid_desc
,
make_tuple
(
make_pass_through_transform
(
K
),
make_merge_transform
(
make_tuple
(
YX
,
C
))),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
const
auto
wei_gemmn_gemmk_grid_desc
=
matrix_padder
.
PadBDescriptor_N_K
(
wei_
k_yxc
_grid_desc
);
matrix_padder
.
PadBDescriptor_N_K
(
wei_
gemmnraw_gemmkraw
_grid_desc
);
return
wei_gemmn_gemmk_grid_desc
;
}
template
<
typename
ELay
,
typename
std
::
enable_if
<
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
NWK
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
NHWK
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
NDHWK
>
,
typename
std
::
enable_if
<
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
G
NWK
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
G
NHWK
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
G
NDHWK
>
,
bool
>::
type
=
false
>
static
auto
MakeEGridDescriptor_M_N
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_k_wos_lengths
,
...
...
@@ -891,15 +1169,13 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
const
index_t
N
=
e_g_n_k_wos_lengths
[
1
];
const
index_t
K
=
e_g_n_k_wos_lengths
[
2
];
const
index_t
GemmMRaw
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
const
index_t
NHoWo
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
index_t
GemmNRaw
=
K
;
const
auto
out_gemmmraw_gemmnraw_grid_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
GemmMRaw
,
GemmNRaw
));
make_naive_tensor_descriptor_packed
(
make_tuple
(
NHoWo
,
K
));
const
auto
out_gemmm_gemmn_grid_desc
=
matrix_padder
.
PadCDescriptor_M_N
(
out_gemmmraw_gemmnraw_grid_desc
);
...
...
@@ -908,30 +1184,30 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
}
template
<
typename
ELay
,
typename
std
::
enable_if
<
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
NW_K
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
NHW_K
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
NDHW_K
>
,
typename
std
::
enable_if
<
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
G_NW_K
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
G_NHW_K
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
G_NDHW_K
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
NWGK
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
NHWGK
>
||
is_same_v
<
ELay
,
tensor_layout
::
convolution
::
NDHWGK
>
,
bool
>::
type
=
false
>
static
auto
MakeEGridDescriptor_M_N
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_k_wos_strides
)
{
namespace
ctc
=
ck
::
tensor_layout
::
convolution
;
const
index_t
N
=
e_g_n_k_wos_lengths
[
1
];
const
index_t
K
=
e_g_n_k_wos_lengths
[
2
];
const
auto
KStride
=
I1
;
const
index_t
WoStride
=
e_g_n_k_wos_strides
[
NDimSpatial
+
2
];
const
index_t
GemmMRaw
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
const
index_t
NHoWo
=
N
*
std
::
accumulate
(
e_g_n_k_wos_lengths
.
begin
()
+
3
,
e_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
index_t
{
1
},
std
::
multiplies
<
index_t
>
());
const
index_t
GemmNRaw
=
K
;
const
auto
out_gemmmraw_gemmnraw_grid_desc
=
make_naive_tensor_descriptor
(
make_tuple
(
GemmMRaw
,
GemmNRaw
),
make_tuple
(
WoStride
,
I1
));
make_naive_tensor_descriptor
(
make_tuple
(
NHoWo
,
K
),
make_tuple
(
WoStride
,
KStride
));
const
auto
out_gemmm_gemmn_grid_desc
=
matrix_padder
.
PadCDescriptor_M_N
(
out_gemmmraw_gemmnraw_grid_desc
);
...
...
@@ -1342,8 +1618,12 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
printf
(
"%d
\n
"
,
itmp
++
);
// check vector access of A
if
constexpr
(
is_same_v
<
ALayout
,
ctc
::
NWC
>
||
is_same_v
<
ALayout
,
ctc
::
NHWC
>
||
is_same_v
<
ALayout
,
ctc
::
NDHWC
>
)
// FIXME: layout
if
constexpr
(
is_same_v
<
ALayout
,
ctc
::
G_NW_C
>
||
is_same_v
<
ALayout
,
ctc
::
G_NHW_C
>
||
is_same_v
<
ALayout
,
ctc
::
G_NDHW_C
>
||
is_same_v
<
ALayout
,
ctc
::
GNWC
>
||
is_same_v
<
ALayout
,
ctc
::
GNHWC
>
||
is_same_v
<
ALayout
,
ctc
::
GNDHWC
>
||
is_same_v
<
ALayout
,
ctc
::
NWGC
>
||
is_same_v
<
ALayout
,
ctc
::
NHWGC
>
||
is_same_v
<
ALayout
,
ctc
::
NDHWGC
>
)
{
const
index_t
C
=
arg
.
a_g_n_c_wis_lengths_
[
2
];
...
...
@@ -1360,8 +1640,13 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
printf
(
"%d
\n
"
,
itmp
++
);
// check vector access of B
if
constexpr
(
is_same_v
<
BLayout
,
ctc
::
KXC
>
||
is_same_v
<
BLayout
,
ctc
::
KYXC
>
||
is_same_v
<
BLayout
,
ctc
::
KZYXC
>
)
// FIXME: layout
if
constexpr
(
is_same_v
<
BLayout
,
ctc
::
G_K_X_C
>
||
is_same_v
<
BLayout
,
ctc
::
G_K_YX_C
>
||
is_same_v
<
BLayout
,
ctc
::
G_K_ZYX_C
>
||
is_same_v
<
BLayout
,
ctc
::
GKXC
>
||
is_same_v
<
BLayout
,
ctc
::
GKYXC
>
||
is_same_v
<
BLayout
,
ctc
::
GKZYXC
>
||
is_same_v
<
BLayout
,
ctc
::
KXGC
>
||
is_same_v
<
BLayout
,
ctc
::
KYXGC
>
||
is_same_v
<
BLayout
,
ctc
::
KZYXGC
>
)
{
const
index_t
C
=
arg
.
b_g_k_c_xs_lengths_
[
2
];
...
...
@@ -1383,9 +1668,12 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
static_for
<
0
,
NumDTensor
,
1
>
{}([
&
](
auto
i
)
{
using
DLayout
=
remove_cvref_t
<
tuple_element_t
<
i
.
value
,
DsLayout
>>
;
if
constexpr
(
is_same_v
<
DLayout
,
ctc
::
NWK
>
||
is_same_v
<
DLayout
,
ctc
::
NHWK
>
||
is_same_v
<
DLayout
,
ctc
::
NDHWK
>
||
is_same_v
<
DLayout
,
ctc
::
NW_K
>
||
is_same_v
<
DLayout
,
ctc
::
NHW_K
>
||
is_same_v
<
DLayout
,
ctc
::
NDHW_K
>
)
// FIXME: layout
if
constexpr
(
is_same_v
<
DLayout
,
ctc
::
G_NW_K
>
||
is_same_v
<
DLayout
,
ctc
::
G_NHW_K
>
||
is_same_v
<
DLayout
,
ctc
::
G_NDHW_K
>
||
is_same_v
<
DLayout
,
ctc
::
GNWK
>
||
is_same_v
<
DLayout
,
ctc
::
GNHWK
>
||
is_same_v
<
DLayout
,
ctc
::
GNDHWK
>
||
is_same_v
<
DLayout
,
ctc
::
NWGK
>
||
is_same_v
<
DLayout
,
ctc
::
NHWGK
>
||
is_same_v
<
DLayout
,
ctc
::
NDHWGK
>
)
{
const
index_t
K
=
arg
.
ds_g_n_k_wos_lengths_
[
i
][
2
];
...
...
@@ -1408,8 +1696,11 @@ struct DeviceConvFwdMultipleD_Xdl_CShuffle : public DeviceConvFwdMultipleD<NDimS
printf
(
"%d
\n
"
,
itmp
++
);
// check vector access of E
if
constexpr
(
is_same_v
<
ELayout
,
ctc
::
NWK
>
||
is_same_v
<
ELayout
,
ctc
::
NHWK
>
||
is_same_v
<
ELayout
,
ctc
::
NDHWK
>
)
if
constexpr
(
is_same_v
<
ELayout
,
ctc
::
G_NW_K
>
||
is_same_v
<
ELayout
,
ctc
::
G_NHW_K
>
||
is_same_v
<
ELayout
,
ctc
::
G_NDHW_K
>
||
is_same_v
<
ELayout
,
ctc
::
GNWK
>
||
is_same_v
<
ELayout
,
ctc
::
GNHWK
>
||
is_same_v
<
ELayout
,
ctc
::
GNDHWK
>
||
is_same_v
<
ELayout
,
ctc
::
NWGK
>
||
is_same_v
<
ELayout
,
ctc
::
NHWGK
>
||
is_same_v
<
ELayout
,
ctc
::
NDHWGK
>
)
{
const
index_t
K
=
arg
.
e_g_n_k_wos_lengths_
[
2
];
...
...
include/ck/tensor_operation/gpu/device/tensor_layout.hpp
View file @
3549e344
...
...
@@ -25,6 +25,39 @@ struct ColumnMajor : public BaseTensorLayout
namespace
convolution
{
// input tensor
// packed NCW/NCHW/NCDHW
struct
NCW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NCW"
;
};
struct
NCHW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NCHW"
;
};
struct
NCDHW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NCDHW"
;
};
// packed GNCW/GNCHW/GNCDHW
struct
GNCW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"GNCW"
;
};
struct
GNCHW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"GNCHW"
;
};
struct
GNCDHW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"GNCDHW"
;
};
// input tensor
// packed NWC/NHWC/NDHWC
struct
NWC
:
public
BaseTensorLayout
...
...
@@ -43,20 +76,88 @@ struct NDHWC : public BaseTensorLayout
};
// input tensor
// packed
NCW/NCHW/NC
DHW
struct
NCW
:
public
BaseTensorLayout
// packed
GNWC/GNHWC/GN
DHW
C
struct
GNWC
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"
NCW
"
;
static
constexpr
const
char
*
name
=
"
GNWC
"
;
};
struct
N
C
HW
:
public
BaseTensorLayout
struct
G
NHW
C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"N
C
HW"
;
static
constexpr
const
char
*
name
=
"
G
NHW
C
"
;
};
struct
N
C
DHW
:
public
BaseTensorLayout
struct
G
NDHW
C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NCDHW"
;
static
constexpr
const
char
*
name
=
"GNDHWC"
;
};
// input tensor
// packed GNWC/GNHWC/GNDHWC
struct
NWGC
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NWGC"
;
};
struct
NHWGC
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NHWGC"
;
};
struct
NDHWGC
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NDHWGC"
;
};
// input tensor
// strided layout
struct
G_NW_C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"G_NW_C"
;
};
struct
G_NHW_C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"G_NHW_C"
;
};
struct
G_NDHW_C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"G_NDHW_C"
;
};
// weight tensor
// packed KCX/KCYX/KCZYX
struct
KCX
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"KCX"
;
};
struct
KCYX
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"KCYX"
;
};
struct
KCZYX
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"KCZYX"
;
};
// weight tensor
// packed KCX/KCYX/KCZYX
struct
GKCX
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"GKCX"
;
};
struct
GKCYX
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"GKCYX"
;
};
struct
GKCZYX
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"GKCZYX"
;
};
// weight tensor
...
...
@@ -77,20 +178,88 @@ struct KZYXC : public BaseTensorLayout
};
// weight tensor
// packed
KCX/KCYX/KC
ZYX
struct
KCX
:
public
BaseTensorLayout
// packed
GKXC/GKYXC/GK
ZYX
C
struct
GKXC
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"
KCX
"
;
static
constexpr
const
char
*
name
=
"
GKXC
"
;
};
struct
K
C
YX
:
public
BaseTensorLayout
struct
G
KYX
C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"K
C
YX"
;
static
constexpr
const
char
*
name
=
"
G
KYX
C
"
;
};
struct
K
C
ZYX
:
public
BaseTensorLayout
struct
G
KZYX
C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"KCZYX"
;
static
constexpr
const
char
*
name
=
"GKZYXC"
;
};
// weight tensor
// packed KXGC/KYXGC/KZYXGC
struct
KXGC
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"KXGC"
;
};
struct
KYXGC
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"KYXGC"
;
};
struct
KZYXGC
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"KZYXGC"
;
};
// weight tensor
// strided
struct
G_K_X_C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"G_K_X_C"
;
};
struct
G_K_YX_C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"G_K_YX_C"
;
};
struct
G_K_ZYX_C
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"G_K_ZYX_C"
;
};
// output tensor
// packed NKW/NKHW/NKDHW
struct
NKW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NKW"
;
};
struct
NKHW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NKHW"
;
};
struct
NKDHW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NKDHW"
;
};
// output tensor
// packed GNKW/GNKHW/GNKDHW
struct
GNKW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"GNKW"
;
};
struct
GNKHW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"GNKHW"
;
};
struct
GNKDHW
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"GNKDHW"
;
};
// output tensor
...
...
@@ -111,37 +280,54 @@ struct NDHWK : public BaseTensorLayout
};
// output tensor
// packed
NKW/NKHW/NK
DHW
struct
NKW
:
public
BaseTensorLayout
// packed
GNWK/GNHWK/GN
DHW
K
struct
GNWK
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"
NKW
"
;
static
constexpr
const
char
*
name
=
"
GNWK
"
;
};
struct
N
K
HW
:
public
BaseTensorLayout
struct
G
NHW
K
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"N
K
HW"
;
static
constexpr
const
char
*
name
=
"
G
NHW
K
"
;
};
struct
N
K
DHW
:
public
BaseTensorLayout
struct
G
NDHW
K
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NKDHW"
;
static
constexpr
const
char
*
name
=
"GNDHWK"
;
};
// output tensor
// packed NWGK/NHWGK/NDHWGK
struct
NWGK
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NWGK"
;
};
struct
NHWGK
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NHWGK"
;
};
struct
NDHWGK
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NDHWGK"
;
};
// output tensor
// strided layout
struct
NW_K
:
public
BaseTensorLayout
struct
G_
NW_K
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NW_K"
;
static
constexpr
const
char
*
name
=
"
G_
NW_K"
;
};
struct
NHW_K
:
public
BaseTensorLayout
struct
G_
NHW_K
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NHW_K"
;
static
constexpr
const
char
*
name
=
"
G_
NHW_K"
;
};
struct
NDHW_K
:
public
BaseTensorLayout
struct
G_
NDHW_K
:
public
BaseTensorLayout
{
static
constexpr
const
char
*
name
=
"NDHW_K"
;
static
constexpr
const
char
*
name
=
"
G_
NDHW_K"
;
};
}
// namespace convolution
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
View file @
3549e344
...
...
@@ -30,12 +30,8 @@ namespace host {
// operation.
// @tparam NumDimSpatial Number of spatial dimensions.
//
// FIXME: only support NDimSpatial = 1 to 3; only support NCHW and NHWC layout.
// Need to be more general
// tensor descriptor in GNCHW/GKCXY/GNKHW dimensional order
template
<
ck
::
index_t
NumDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
,
typename
InDataType
,
typename
WeiDataType
,
typename
OutDataType
,
...
...
@@ -91,7 +87,6 @@ struct ReferenceConvFwd : public device::BaseOperator
float
Run
(
const
Argument
&
arg
)
{
// tensor descriptor in NCHW/KXYC/NKHW dimensional order
if
constexpr
(
NumDimSpatial
==
1
)
{
auto
func
=
[
&
](
auto
g
,
auto
n
,
auto
k
,
auto
wo
)
{
...
...
library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp
View file @
3549e344
...
...
@@ -4,6 +4,7 @@
#pragma once
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
...
...
@@ -11,109 +12,238 @@ namespace ck {
namespace
utils
{
namespace
conv
{
namespace
detail
{
template
<
typename
OldLayout
>
std
::
vector
<
std
::
size_t
>
get_layout_transpose_gnchw_to_old
()
{
if
constexpr
(
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNCW
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GKCX
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNKW
>
)
{
return
{
0
,
1
,
2
,
3
};
}
else
if
constexpr
(
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNCHW
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GKCYX
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNKHW
>
)
{
return
{
0
,
1
,
2
,
3
,
4
};
}
else
if
constexpr
(
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNCDHW
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GKCZYX
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNKDHW
>
)
{
return
{
0
,
1
,
2
,
3
,
4
,
5
};
}
if
constexpr
(
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNWC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GKXC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNWK
>
)
{
return
{
0
,
1
,
3
,
2
};
}
else
if
constexpr
(
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNHWC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GKYXC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNHWK
>
)
{
return
{
0
,
1
,
4
,
2
,
3
};
}
else
if
constexpr
(
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNDHWC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GKZYXC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
GNDHWK
>
)
{
return
{
0
,
1
,
5
,
2
,
3
,
4
};
}
else
if
constexpr
(
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
NWGC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
KXGC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
NWGK
>
)
{
return
{
2
,
0
,
3
,
1
};
}
else
if
constexpr
(
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
NHWGC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
KYXGC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
NHWGK
>
)
{
return
{
3
,
0
,
4
,
1
,
2
};
}
else
if
constexpr
(
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
NDHWGC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
KZYXGC
>
||
ck
::
is_same_v
<
OldLayout
,
ck
::
tensor_layout
::
convolution
::
NDHWGK
>
)
{
return
{
4
,
0
,
5
,
1
,
2
,
3
};
}
else
{
printf
(
"%s
\n
"
,
__func__
);
throw
std
::
runtime_error
(
"wrong! unsupported layout"
);
}
}
}
// namespace detail
// make tensor descriptor for packed input tensor, and order the dimension in the order of GNCHW
// regardless of physical layout
template
<
typename
InLayout
>
HostTensorDescriptor
get_input_host_tensor_descriptor
(
const
ck
::
utils
::
conv
::
ConvParam
&
param
)
HostTensorDescriptor
make_input_host_tensor_descriptor_packed
(
const
ck
::
utils
::
conv
::
ConvParam
&
param
)
{
if
constexpr
(
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NWC
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NHWC
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>
)
std
::
vector
<
std
::
size_t
>
physical_lengths
;
if
constexpr
(
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
GNCW
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
GNCHW
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
GNCDHW
>
)
{
std
::
vector
<
std
::
size_t
>
nhwc_lengths
{
static_cast
<
std
::
size_t
>
(
param
.
N_
),
physical_lengths
=
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
param
.
G_
),
static_cast
<
std
::
size_t
>
(
param
.
N_
),
static_cast
<
std
::
size_t
>
(
param
.
C_
)};
nhwc
_lengths
.
insert
(
nhwc
_lengths
.
begin
()
+
1
,
physical
_lengths
.
insert
(
physical
_lengths
.
end
()
,
param
.
input_spatial_lengths_
.
begin
(),
param
.
input_spatial_lengths_
.
end
());
return
HostTensorDescriptor
(
nhwc_lengths
);
param
.
input_spatial_lengths_
.
begin
()
+
param
.
num_dim_spatial_
);
}
else
if
constexpr
(
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NCW
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
N
C
HW
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
N
C
DHW
>
)
else
if
constexpr
(
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
GNWC
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
G
NHW
C
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
G
NDHW
C
>
)
{
std
::
vector
<
std
::
size_t
>
nchw_lengths
{
static_cast
<
std
::
size_t
>
(
param
.
N_
),
physical_lengths
=
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
param
.
G_
),
static_cast
<
std
::
size_t
>
(
param
.
N_
),
static_cast
<
std
::
size_t
>
(
param
.
C_
)};
nchw
_lengths
.
insert
(
nchw
_lengths
.
end
()
,
physical
_lengths
.
insert
(
physical
_lengths
.
begin
()
+
2
,
param
.
input_spatial_lengths_
.
begin
(),
param
.
input_spatial_lengths_
.
end
());
param
.
input_spatial_lengths_
.
begin
()
+
param
.
num_dim_spatial_
);
}
else
if
constexpr
(
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NWGC
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NHWGC
>
||
ck
::
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NDHWGC
>
)
{
physical_lengths
=
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
param
.
N_
),
static_cast
<
std
::
size_t
>
(
param
.
G_
),
static_cast
<
std
::
size_t
>
(
param
.
C_
)};
return
HostTensorDescriptor
(
nchw_lengths
);
physical_lengths
.
insert
(
physical_lengths
.
begin
()
+
1
,
param
.
input_spatial_lengths_
.
begin
(),
param
.
input_spatial_lengths_
.
begin
()
+
param
.
num_dim_spatial_
);
}
else
{
printf
(
"%s
\n
"
,
__func__
);
printf
(
"%s
\n
"
,
InLayout
::
name
);
throw
std
::
runtime_error
(
"wrong! unsupported layout"
);
}
return
transpose_host_tensor_descriptor_given_new2old
(
HostTensorDescriptor
(
physical_lengths
),
detail
::
get_layout_transpose_gnchw_to_old
<
InLayout
>
());
}
// make tensor descriptor for packed weight tensor, and order the dimension in the order of GKCYX
// regardless of physical layout
template
<
typename
WeiLayout
>
HostTensorDescriptor
get_weight_host_tensor_descriptor
(
const
ck
::
utils
::
conv
::
ConvParam
&
param
)
HostTensorDescriptor
make_weight_host_tensor_descriptor_packed
(
const
ck
::
utils
::
conv
::
ConvParam
&
param
)
{
if
constexpr
(
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KXC
>
||
std
::
vector
<
std
::
size_t
>
physical_lengths
;
if
constexpr
(
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
GKCX
>
||
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
GKCYX
>
||
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
GKCZYX
>
)
{
physical_lengths
=
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
param
.
G_
),
static_cast
<
std
::
size_t
>
(
param
.
K_
),
static_cast
<
std
::
size_t
>
(
param
.
C_
)};
physical_lengths
.
insert
(
physical_lengths
.
end
(),
param
.
filter_spatial_lengths_
.
begin
(),
param
.
filter_spatial_lengths_
.
begin
()
+
param
.
num_dim_spatial_
);
}
else
if
constexpr
(
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KXC
>
||
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KYXC
>
||
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>
)
{
std
::
vector
<
std
::
size_t
>
kyxc_lengths
{
static_cast
<
std
::
size_t
>
(
param
.
K_
),
physical_lengths
=
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
param
.
G_
),
static_cast
<
std
::
size_t
>
(
param
.
K_
),
static_cast
<
std
::
size_t
>
(
param
.
C_
)};
kyxc
_lengths
.
insert
(
kyxc
_lengths
.
begin
()
+
1
,
physical
_lengths
.
insert
(
physical
_lengths
.
begin
()
+
2
,
param
.
filter_spatial_lengths_
.
begin
(),
param
.
filter_spatial_lengths_
.
end
());
return
HostTensorDescriptor
(
kyxc_lengths
);
param
.
filter_spatial_lengths_
.
begin
()
+
param
.
num_dim_spatial_
);
}
else
if
constexpr
(
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
K
C
X
>
||
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
K
C
YX
>
||
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
K
C
ZYX
>
)
else
if
constexpr
(
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KX
GC
>
||
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KYX
GC
>
||
ck
::
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KZYX
GC
>
)
{
std
::
vector
<
std
::
size_t
>
kcyx_lengths
{
static_cast
<
std
::
size_t
>
(
param
.
K_
),
physical_lengths
=
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
param
.
K_
),
static_cast
<
std
::
size_t
>
(
param
.
G_
),
static_cast
<
std
::
size_t
>
(
param
.
C_
)};
kcyx
_lengths
.
insert
(
kcyx
_lengths
.
end
()
,
physical
_lengths
.
insert
(
physical
_lengths
.
begin
()
+
1
,
param
.
filter_spatial_lengths_
.
begin
(),
param
.
filter_spatial_lengths_
.
end
());
return
HostTensorDescriptor
(
kcyx_lengths
);
param
.
filter_spatial_lengths_
.
begin
()
+
param
.
num_dim_spatial_
);
}
else
{
printf
(
"%s
\n
"
,
__func__
);
throw
std
::
runtime_error
(
"wrong! unsupported layout"
);
}
return
transpose_host_tensor_descriptor_given_new2old
(
HostTensorDescriptor
(
physical_lengths
),
detail
::
get_layout_transpose_gnchw_to_old
<
WeiLayout
>
());
}
// make tensor descriptor for packed output tensor, and order the dimension in the order of GNKHW
// regardless of physical layout
template
<
typename
OutLayout
>
HostTensorDescriptor
get_output_host_tensor_descriptor
(
const
ck
::
utils
::
conv
::
ConvParam
&
param
)
HostTensorDescriptor
make_output_host_tensor_descriptor_packed
(
const
ck
::
utils
::
conv
::
ConvParam
&
param
)
{
if
constexpr
(
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NWK
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>
)
std
::
vector
<
std
::
size_t
>
physical_lengths
;
if
constexpr
(
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
GNKW
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
GNKHW
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
GNKDHW
>
)
{
std
::
vector
<
std
::
size_t
>
nhwk_lengths
{
static_cast
<
std
::
size_t
>
(
param
.
N_
),
physical_lengths
=
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
param
.
G_
),
static_cast
<
std
::
size_t
>
(
param
.
N_
),
static_cast
<
std
::
size_t
>
(
param
.
K_
)};
nhwk
_lengths
.
insert
(
nhwk
_lengths
.
begin
()
+
1
,
physical
_lengths
.
insert
(
physical
_lengths
.
end
()
,
param
.
output_spatial_lengths_
.
begin
(),
param
.
output_spatial_lengths_
.
end
());
return
HostTensorDescriptor
(
nhwk_lengths
);
param
.
output_spatial_lengths_
.
begin
()
+
param
.
num_dim_spatial_
);
}
else
if
constexpr
(
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NKW
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
N
K
HW
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
N
K
DHW
>
)
else
if
constexpr
(
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
GNWK
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
G
NHW
K
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
G
NDHW
K
>
)
{
std
::
vector
<
std
::
size_t
>
nkhw_lengths
{
static_cast
<
std
::
size_t
>
(
param
.
N_
),
physical_lengths
=
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
param
.
G_
),
static_cast
<
std
::
size_t
>
(
param
.
N_
),
static_cast
<
std
::
size_t
>
(
param
.
K_
)};
nkhw
_lengths
.
insert
(
nkhw
_lengths
.
end
()
,
physical
_lengths
.
insert
(
physical
_lengths
.
begin
()
+
2
,
param
.
output_spatial_lengths_
.
begin
(),
param
.
output_spatial_lengths_
.
end
());
param
.
output_spatial_lengths_
.
begin
()
+
param
.
num_dim_spatial_
);
}
else
if
constexpr
(
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NWGK
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NHWGK
>
||
ck
::
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NDHWGK
>
)
{
physical_lengths
=
std
::
vector
<
std
::
size_t
>
{
static_cast
<
std
::
size_t
>
(
param
.
N_
),
static_cast
<
std
::
size_t
>
(
param
.
G_
),
static_cast
<
std
::
size_t
>
(
param
.
K_
)};
return
HostTensorDescriptor
(
nkhw_lengths
);
physical_lengths
.
insert
(
physical_lengths
.
begin
()
+
1
,
param
.
output_spatial_lengths_
.
begin
(),
param
.
output_spatial_lengths_
.
begin
()
+
param
.
num_dim_spatial_
);
}
else
{
printf
(
"%s
\n
"
,
__func__
);
throw
std
::
runtime_error
(
"wrong! unsupported layout"
);
}
return
transpose_host_tensor_descriptor_given_new2old
(
HostTensorDescriptor
(
physical_lengths
),
detail
::
get_layout_transpose_gnchw_to_old
<
OutLayout
>
());
}
}
// namespace conv
...
...
library/include/ck/library/utility/host_tensor.hpp
View file @
3549e344
...
...
@@ -358,6 +358,19 @@ struct Tensor
mDesc
.
GetLengths
()[
4
])(
num_thread
);
break
;
}
case
6
:
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
,
auto
i3
,
auto
i4
,
auto
i5
)
{
(
*
this
)(
i0
,
i1
,
i2
,
i3
,
i4
)
=
g
(
i0
,
i1
,
i2
,
i3
,
i4
,
i5
);
};
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
],
mDesc
.
GetLengths
()[
2
],
mDesc
.
GetLengths
()[
3
],
mDesc
.
GetLengths
()[
4
],
mDesc
.
GetLengths
()[
5
])(
num_thread
);
break
;
}
default:
throw
std
::
runtime_error
(
"unspported dimension"
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment