Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
54f914c6
Commit
54f914c6
authored
Oct 01, 2024
by
Astha Rai
Browse files
added header guards/replicated functionality in device files
parent
7d9969ab
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
83 additions
and
63 deletions
+83
-63
include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp
...eration/gpu/device/convolution_forward_specialization.hpp
+4
-0
include/ck/tensor_operation/gpu/device/gemm_specialization.hpp
...de/ck/tensor_operation/gpu/device/gemm_specialization.hpp
+2
-0
include/ck/tensor_operation/gpu/device/impl/codegen_device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp
...gen_device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp
+2
-2
include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp
...eration/operator_transform/transform_conv_fwd_to_gemm.hpp
+73
-61
include/ck/utility/type_convert.hpp
include/ck/utility/type_convert.hpp
+2
-0
No files found.
include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp
View file @
54f914c6
...
...
@@ -3,7 +3,9 @@
#pragma once
#ifndef CK_CODE_GEN_RTC
#include <string>
#endif
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -18,6 +20,7 @@ enum struct ConvolutionForwardSpecialization
Filter3x3
,
};
#ifndef CK_CODE_GEN_RTC
inline
std
::
string
getConvForwardSpecializationString
(
const
ConvolutionForwardSpecialization
&
s
)
{
switch
(
s
)
...
...
@@ -30,6 +33,7 @@ inline std::string getConvForwardSpecializationString(const ConvolutionForwardSp
default:
return
"Unrecognized specialization!"
;
}
}
#endif
}
// namespace device
}
// namespace tensor_operation
...
...
include/ck/tensor_operation/gpu/device/gemm_specialization.hpp
View file @
54f914c6
...
...
@@ -29,6 +29,7 @@ enum struct GemmSpecialization
MNKOPadding
,
};
#ifndef CK_CODE_GEN_RTC
inline
std
::
string
getGemmSpecializationString
(
const
GemmSpecialization
&
s
)
{
switch
(
s
)
...
...
@@ -52,6 +53,7 @@ inline std::string getGemmSpecializationString(const GemmSpecialization& s)
default:
return
"Unrecognized specialization!"
;
}
}
#endif
}
// namespace device
}
// namespace tensor_operation
...
...
include/ck/tensor_operation/gpu/device/impl/codegen_device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp
View file @
54f914c6
...
...
@@ -20,13 +20,13 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
#include "ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_abd.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_abd.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp"
#include "ck/host_utility/io.hpp"
//
#include "ck/host_utility/io.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp
View file @
54f914c6
...
...
@@ -148,8 +148,8 @@ struct TransformConvFwdToGemm
template
<
typename
ConvDimsType
,
typename
ConvSpatialDimsType
,
index_t
NDim
=
NDimSpatial
,
typename
std
::
enable_if
<
NDim
==
1
,
bool
>
::
type
=
false
>
index_t
NDim
=
NDimSpatial
,
typename
ck
::
enable_if
<
NDim
==
1
,
bool
>
::
type
=
false
>
__host__
__device__
TransformConvFwdToGemm
(
const
ConvDimsType
&
a_g_n_c_wis_lengths
,
const
ConvDimsType
&
a_g_n_c_wis_strides
,
const
ConvDimsType
&
b_g_k_c_xs_lengths
,
...
...
@@ -201,11 +201,15 @@ struct TransformConvFwdToGemm
InRightPadW_
{
input_right_pads
[
I0
]},
ZYX_
{
X_
}
{
#ifdef CK_CODE_GEN_RTC
static_assert
(
is_same_v
<
ConvSpatialDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
>>
);
static_assert
(
is_same_v
<
ConvDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
+
I3
>>
);
#else
static_assert
(
is_same_v
<
ConvSpatialDimsType
,
std
::
array
<
IndexType
,
NDimSpatial
>>
||
is_same_v
<
ConvSpatialDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
>>
);
static_assert
(
is_same_v
<
ConvDimsType
,
std
::
array
<
IndexType
,
NDimSpatial
+
I3
>>
||
is_same_v
<
ConvDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
+
I3
>>
);
#endif
if
constexpr
(
SplitN
)
{
N_
=
GetSplitedNSize
(
...
...
@@ -219,8 +223,8 @@ struct TransformConvFwdToGemm
template
<
typename
ConvDimsType
,
typename
ConvSpatialDimsType
,
index_t
NDim
=
NDimSpatial
,
typename
std
::
enable_if
<
NDim
==
2
,
bool
>
::
type
=
false
>
index_t
NDim
=
NDimSpatial
,
typename
ck
::
enable_if
<
NDim
==
2
,
bool
>
::
type
=
false
>
__host__
__device__
TransformConvFwdToGemm
(
const
ConvDimsType
&
a_g_n_c_wis_lengths
,
const
ConvDimsType
&
a_g_n_c_wis_strides
,
const
ConvDimsType
&
b_g_k_c_xs_lengths
,
...
...
@@ -272,11 +276,15 @@ struct TransformConvFwdToGemm
InRightPadW_
{
input_right_pads
[
I1
]},
ZYX_
{
Y_
*
X_
}
{
#ifdef CK_CODE_GEN_RTC
static_assert
(
is_same_v
<
ConvSpatialDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
>>
);
static_assert
(
is_same_v
<
ConvDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
+
I3
>>
);
#else
static_assert
(
is_same_v
<
ConvSpatialDimsType
,
std
::
array
<
IndexType
,
NDimSpatial
>>
||
is_same_v
<
ConvSpatialDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
>>
);
static_assert
(
is_same_v
<
ConvDimsType
,
std
::
array
<
IndexType
,
NDimSpatial
+
I3
>>
||
is_same_v
<
ConvDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
+
I3
>>
);
#endif
if
constexpr
(
SplitN
)
{
N_
=
GetSplitedNSize
(
...
...
@@ -290,8 +298,8 @@ struct TransformConvFwdToGemm
template
<
typename
ConvDimsType
,
typename
ConvSpatialDimsType
,
index_t
NDim
=
NDimSpatial
,
typename
std
::
enable_if
<
NDim
==
3
,
bool
>
::
type
=
false
>
index_t
NDim
=
NDimSpatial
,
typename
ck
::
enable_if
<
NDim
==
3
,
bool
>
::
type
=
false
>
__host__
__device__
TransformConvFwdToGemm
(
const
ConvDimsType
&
a_g_n_c_wis_lengths
,
const
ConvDimsType
&
a_g_n_c_wis_strides
,
const
ConvDimsType
&
b_g_k_c_xs_lengths
,
...
...
@@ -343,11 +351,15 @@ struct TransformConvFwdToGemm
InRightPadW_
{
input_right_pads
[
I2
]},
ZYX_
{
Z_
*
Y_
*
X_
}
{
#ifdef CK_CODE_GEN_RTC
static_assert
(
is_same_v
<
ConvSpatialDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
>>
);
static_assert
(
is_same_v
<
ConvDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
+
I3
>>
);
#else
static_assert
(
is_same_v
<
ConvSpatialDimsType
,
std
::
array
<
IndexType
,
NDimSpatial
>>
||
is_same_v
<
ConvSpatialDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
>>
);
static_assert
(
is_same_v
<
ConvDimsType
,
std
::
array
<
IndexType
,
NDimSpatial
+
I3
>>
||
is_same_v
<
ConvDimsType
,
ck
::
Array
<
IndexType
,
NDimSpatial
+
I3
>>
);
#endif
if
constexpr
(
SplitN
)
{
N_
=
GetSplitedNSize
(
...
...
@@ -478,11 +490,11 @@ struct TransformConvFwdToGemm
// TODO: implement ck::tensor_layout::convolution that describe packed/strided dimemsion as
// properties
template
<
typename
ALayout
,
typename
std
::
enable_if
<
NDimSpatial
==
1
&&
(
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
G_NW_C
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
NWGC
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
GNWC
>
),
bool
>::
type
=
false
>
typename
ck
::
enable_if
<
NDimSpatial
==
1
&&
(
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
G_NW_C
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
NWGC
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
GNWC
>
),
bool
>::
type
=
false
>
__host__
__device__
auto
MakeADescriptor_M_K
()
const
{
if
constexpr
(
ConvForwardSpecialization
==
...
...
@@ -691,11 +703,11 @@ struct TransformConvFwdToGemm
}
template
<
typename
ALayout
,
typename
std
::
enable_if
<
NDimSpatial
==
2
&&
(
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
G_NHW_C
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
NHWGC
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
GNHWC
>
),
bool
>::
type
=
false
>
typename
ck
::
enable_if
<
NDimSpatial
==
2
&&
(
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
G_NHW_C
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
NHWGC
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
GNHWC
>
),
bool
>::
type
=
false
>
__host__
__device__
auto
MakeADescriptor_M_K
()
const
{
...
...
@@ -932,7 +944,7 @@ struct TransformConvFwdToGemm
}
template
<
typename
ALayout
,
typename
std
::
enable_if
<
typename
ck
::
enable_if
<
NDimSpatial
==
3
&&
(
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
G_NDHW_C
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
NDHWGC
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
GNDHWC
>
),
...
...
@@ -1242,19 +1254,19 @@ struct TransformConvFwdToGemm
}
template
<
typename
BLayout
,
typename
std
::
enable_if
<
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
GKXC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
GKYXC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
GKZYXC
>
,
bool
>::
type
=
false
>
typename
ck
::
enable_if
<
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
GKXC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
GKYXC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
GKZYXC
>
,
bool
>::
type
=
false
>
__host__
__device__
auto
MakeBDescriptor_N_K
()
const
{
if
constexpr
(
ConvForwardSpecialization
==
device
::
ConvolutionForwardSpecialization
::
Filter3x3
)
{
using
FilterSizeNumType
=
std
::
conditional_t
<
NDimSpatial
==
1
,
Number
<
3
>
,
std
::
conditional_t
<
NDimSpatial
==
2
,
Number
<
9
>
,
Number
<
27
>>>
;
ck
::
conditional_t
<
NDimSpatial
==
1
,
Number
<
3
>
,
ck
::
conditional_t
<
NDimSpatial
==
2
,
Number
<
9
>
,
Number
<
27
>>>
;
if
constexpr
(
NumGroupsToMerge
==
1
)
{
...
...
@@ -1297,13 +1309,13 @@ struct TransformConvFwdToGemm
template
<
typename
BLayout
,
typename
std
::
enable_if
<
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
G_K_X_C
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
G_K_YX_C
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
G_K_ZYX_C
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
KXGC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
KYXGC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
KZYXGC
>
,
bool
>::
type
=
false
>
typename
ck
::
enable_if
<
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
G_K_X_C
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
G_K_YX_C
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
G_K_ZYX_C
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
KXGC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
KYXGC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
KZYXGC
>
,
bool
>::
type
=
false
>
__host__
__device__
auto
MakeBDescriptor_N_K
()
const
{
const
auto
wei_k_yx_c_desc
=
make_naive_tensor_descriptor
(
...
...
@@ -1318,36 +1330,36 @@ struct TransformConvFwdToGemm
return
wei_gemmn_gemmk_desc
;
}
template
<
typename
CLayout
,
index_t
NDimSp
=
NDimSpatial
,
template
<
typename
CLayout
,
index_t
NDimSp
=
NDimSpatial
,
typename
std
::
enable_if
<
NDimSp
==
1
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_K
>),
bool
>::
type
=
false
>
typename
ck
::
enable_if
<
NDimSp
==
1
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_K
>),
bool
>::
type
=
false
>
__host__
__device__
auto
MakeCDescriptor_M_N
()
const
{
return
make_naive_tensor_descriptor
(
make_tuple
(
N_
*
Wo_
,
K_
),
make_tuple
(
I0
,
KStrideTensorC_
));
}
template
<
typename
CLayout
,
index_t
NDimSp
=
NDimSpatial
,
template
<
typename
CLayout
,
index_t
NDimSp
=
NDimSpatial
,
typename
std
::
enable_if
<
NDimSp
==
2
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_K
>),
bool
>::
type
=
false
>
typename
ck
::
enable_if
<
NDimSp
==
2
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_K
>),
bool
>::
type
=
false
>
__host__
__device__
auto
MakeCDescriptor_M_N
()
const
{
return
make_naive_tensor_descriptor
(
make_tuple
(
N_
*
Ho_
*
Wo_
,
K_
),
make_tuple
(
I0
,
KStrideTensorC_
));
}
template
<
typename
CLayout
,
index_t
NDimSp
=
NDimSpatial
,
template
<
typename
CLayout
,
index_t
NDimSp
=
NDimSpatial
,
typename
std
::
enable_if
<
NDimSp
==
3
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_K
>),
bool
>::
type
=
false
>
typename
ck
::
enable_if
<
NDimSp
==
3
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_K
>),
bool
>::
type
=
false
>
__host__
__device__
auto
MakeCDescriptor_M_N
()
const
{
return
make_naive_tensor_descriptor
(
make_tuple
(
N_
*
Do_
*
Ho_
*
Wo_
,
K_
),
...
...
@@ -1355,12 +1367,12 @@ struct TransformConvFwdToGemm
}
template
<
typename
CLayout
,
index_t
NDimSp
=
NDimSpatial
,
typename
std
::
enable_if
<
NDimSp
==
1
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_NW_K
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
NWGK
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
GNWK
>
),
bool
>::
type
=
false
>
index_t
NDimSp
=
NDimSpatial
,
typename
ck
::
enable_if
<
NDimSp
==
1
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_NW_K
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
NWGK
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
GNWK
>
),
bool
>::
type
=
false
>
__host__
__device__
auto
MakeCDescriptor_M_N
()
const
{
const
IndexType
NDoHoWo
=
N_
*
Wo_
;
...
...
@@ -1410,11 +1422,11 @@ struct TransformConvFwdToGemm
template
<
typename
CLayout
,
index_t
NDimSp
=
NDimSpatial
,
typename
std
::
enable_if
<
NDimSp
==
2
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_NHW_K
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
NHWGK
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
GNHWK
>
),
bool
>::
type
=
false
>
typename
ck
::
enable_if
<
NDimSp
==
2
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_NHW_K
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
NHWGK
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
GNHWK
>
),
bool
>::
type
=
false
>
__host__
__device__
auto
MakeCDescriptor_M_N
()
const
{
const
IndexType
NDoHoWo
=
N_
*
Ho_
*
Wo_
;
...
...
@@ -1467,7 +1479,7 @@ struct TransformConvFwdToGemm
template
<
typename
CLayout
,
index_t
NDimSp
=
NDimSpatial
,
typename
std
::
enable_if
<
typename
ck
::
enable_if
<
NDimSp
==
3
&&
(
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_NDHW_K
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
NDHWGK
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
GNDHWK
>
),
...
...
include/ck/utility/type_convert.hpp
View file @
54f914c6
...
...
@@ -501,6 +501,7 @@ inline __host__ __device__ half_t type_convert<half_t, bf8_t>(bf8_t x)
#endif
}
#ifndef CK_CODE_GEN_RTC
template
<
typename
Y
,
typename
X
,
size_t
NumElems
>
inline
__host__
__device__
void
array_convert
(
std
::
array
<
Y
,
NumElems
>&
y
,
const
std
::
array
<
X
,
NumElems
>&
x
)
...
...
@@ -510,6 +511,7 @@ inline __host__ __device__ void array_convert(std::array<Y, NumElems>& y,
y
[
i
]
=
type_convert
<
Y
>
(
x
[
i
]);
}
}
#endif
template
<
typename
Y
,
typename
X
,
index_t
NumElems
>
inline
__host__
__device__
void
array_convert
(
Array
<
Y
,
NumElems
>&
y
,
const
Array
<
X
,
NumElems
>&
x
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment