Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
16effa76
"driver/src/conv_driver.cpp" did not exist on "51a9fa1dbddd74558472881f616a7fb79759fd24"
Commit
16effa76
authored
Aug 16, 2021
by
Chao Liu
Browse files
refactor
parent
a91b68df
Changes
19
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
99 additions
and
91 deletions
+99
-91
CMakeLists.txt
CMakeLists.txt
+1
-0
composable_kernel/include/tensor_description/multi_index_transform.hpp
...rnel/include/tensor_description/multi_index_transform.hpp
+1
-1
composable_kernel/include/tensor_description/multi_index_transform_helper.hpp
...clude/tensor_description/multi_index_transform_helper.hpp
+1
-1
composable_kernel/include/tensor_description/tensor_adaptor.hpp
...able_kernel/include/tensor_description/tensor_adaptor.hpp
+1
-3
composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp
...l/include/tensor_description/tensor_descriptor_helper.hpp
+1
-1
composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v2r2.hpp
...el/include/tensor_operation/blockwise_gemm_dlops_v2r2.hpp
+18
-18
composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v2r3.hpp
...el/include/tensor_operation/blockwise_gemm_dlops_v2r3.hpp
+3
-3
composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp
...include/tensor_operation/threadwise_contraction_dlops.hpp
+8
-8
composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp
...nel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp
+3
-3
composable_kernel/include/tensor_operation/threadwise_tensor_slice_set.hpp
.../include/tensor_operation/threadwise_tensor_slice_set.hpp
+1
-1
composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer.hpp
...ude/tensor_operation/threadwise_tensor_slice_transfer.hpp
+13
-14
composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_v2.hpp
.../tensor_operation/threadwise_tensor_slice_transfer_v2.hpp
+10
-11
composable_kernel/include/utility/c_style_pointer_cast.hpp
composable_kernel/include/utility/c_style_pointer_cast.hpp
+2
-1
composable_kernel/include/utility/common_header.hpp
composable_kernel/include/utility/common_header.hpp
+1
-0
composable_kernel/include/utility/dynamic_buffer.hpp
composable_kernel/include/utility/dynamic_buffer.hpp
+3
-2
composable_kernel/include/utility/enable_if.hpp
composable_kernel/include/utility/enable_if.hpp
+13
-0
composable_kernel/include/utility/math.hpp
composable_kernel/include/utility/math.hpp
+3
-6
composable_kernel/include/utility/tuple.hpp
composable_kernel/include/utility/tuple.hpp
+14
-15
composable_kernel/include/utility/type.hpp
composable_kernel/include/utility/type.hpp
+2
-3
No files found.
CMakeLists.txt
View file @
16effa76
...
...
@@ -43,6 +43,7 @@ message(STATUS "Build with HIP ${hip_VERSION}")
message
(
"HALF_INCLUDE_DIR:
${
HALF_INCLUDE_DIR
}
"
)
# CMAKE_CXX_FLAGS
SET
(
BUILD_DEV ON CACHE BOOL
"BUILD_DEV"
)
if
(
BUILD_DEV
)
string
(
APPEND CMAKE_CXX_FLAGS
" -Werror -Weverything"
)
endif
()
...
...
composable_kernel/include/tensor_description/multi_index_transform.hpp
View file @
16effa76
...
...
@@ -377,7 +377,7 @@ struct RightPad
// at compile-time
template
<
typename
UpLengths
,
typename
Coefficients
,
typename
std
::
enable_if
<
UpLengths
::
Size
()
==
Coefficients
::
Size
(),
bool
>
::
type
=
false
>
typename
enable_if
<
UpLengths
::
Size
()
==
Coefficients
::
Size
(),
bool
>
::
type
=
false
>
struct
Embed
{
static
constexpr
index_t
NDimUp
=
UpLengths
::
Size
();
...
...
composable_kernel/include/tensor_description/multi_index_transform_helper.hpp
View file @
16effa76
...
...
@@ -42,7 +42,7 @@ __host__ __device__ constexpr auto make_right_pad_transform(
template
<
typename
UpLengths
,
typename
Coefficients
,
typename
std
::
enable_if
<
UpLengths
::
Size
()
==
Coefficients
::
Size
(),
bool
>
::
type
=
false
>
typename
enable_if
<
UpLengths
::
Size
()
==
Coefficients
::
Size
(),
bool
>
::
type
=
false
>
__host__
__device__
constexpr
auto
make_embed_transform
(
const
UpLengths
&
up_lengths
,
const
Coefficients
&
coefficients
)
{
...
...
composable_kernel/include/tensor_description/tensor_adaptor.hpp
View file @
16effa76
...
...
@@ -454,9 +454,7 @@ __host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transf
remove_cv_t
<
decltype
(
top_dim_hidden_ids
)
>>
{
transforms
};
}
template
<
typename
X
,
typename
...
Xs
,
typename
std
::
enable_if
<
sizeof
...(
Xs
)
>
=
2
,
bool
>::
type
=
false
>
template
<
typename
X
,
typename
...
Xs
,
typename
enable_if
<
sizeof
...(
Xs
)
>
=
2
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
auto
chain_tensor_adaptors
(
const
X
&
x
,
const
Xs
&
...
xs
)
{
return
chain_tensor_adaptors
(
x
,
chain_tensor_adaptors
(
xs
...));
...
...
composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp
View file @
16effa76
...
...
@@ -37,7 +37,7 @@ __host__ __device__ constexpr auto calculate_element_space_size_impl(const Lengt
template
<
typename
...
Lengths
,
typename
...
Strides
,
typename
std
::
enable_if
<
sizeof
...(
Lengths
)
==
sizeof
...(
Strides
),
bool
>
::
type
=
false
>
typename
enable_if
<
sizeof
...(
Lengths
)
==
sizeof
...(
Strides
),
bool
>
::
type
=
false
>
__host__
__device__
constexpr
auto
make_naive_tensor_descriptor_v2
(
const
Tuple
<
Lengths
...
>&
lengths
,
const
Tuple
<
Strides
...
>&
strides
)
{
...
...
composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v2r2.hpp
View file @
16effa76
...
...
@@ -22,7 +22,8 @@ namespace ck {
// 2. CThreadBuffer is StaticBuffer
// Also assume:
// M0 = N0 = 2. It will do 2x2 pipelined read and fma (ABBA optimization)
template
<
index_t
BlockSize
,
template
<
index_t
BlockSize
,
typename
FloatA
,
typename
FloatB
,
typename
FloatC
,
...
...
@@ -37,8 +38,7 @@ template <index_t BlockSize,
index_t
M1N1ThreadClusterN101
,
index_t
AThreadCopyScalarPerVector_M11
,
index_t
BThreadCopyScalarPerVector_N11
,
typename
std
::
enable_if
<
AKMBlockDesc
::
IsKnownAtCompileTime
()
&&
BKNBlockDesc
::
IsKnownAtCompileTime
(),
typename
enable_if
<
AKMBlockDesc
::
IsKnownAtCompileTime
()
&&
BKNBlockDesc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
struct
BlockwiseGemmDlops_km_kn_m0m1n0n1_v2r2_pipeline_2x2
{
...
...
composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v2r3.hpp
View file @
16effa76
...
...
@@ -38,7 +38,7 @@ template <index_t BlockSize,
// BM10BN10ThreadClusterBN101, ...>
index_t
AThreadCopyScalarPerVector_BM11
,
index_t
BThreadCopyScalarPerVector_BN11
,
typename
std
::
enable_if
<
ABlockDesc_BK0_BM_BK1
::
IsKnownAtCompileTime
()
&&
typename
enable_if
<
ABlockDesc_BK0_BM_BK1
::
IsKnownAtCompileTime
()
&&
BBlockDesc_BK0_BN_BK1
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
struct
BlockwiseGemmDlops_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2
...
...
composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp
View file @
16effa76
...
...
@@ -21,7 +21,7 @@ template <typename FloatA,
typename
TKLengths
,
typename
TMLengths
,
typename
TNLengths
,
typename
std
::
enable_if
<
AThreadDesc_TK0_TM0_TM1_TK1
::
IsKnownAtCompileTime
()
&&
typename
enable_if
<
AThreadDesc_TK0_TM0_TM1_TK1
::
IsKnownAtCompileTime
()
&&
BThreadDesc_TK0_TN0_TN1_TK1
::
IsKnownAtCompileTime
()
&&
CThreadDesc_TM0_TM1_TN0_TN1
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
...
...
@@ -123,7 +123,7 @@ template <typename FloatA,
typename
TKLengths
,
typename
TMLengths
,
typename
TNLengths
,
typename
std
::
enable_if
<
AThreadDesc_TK0_TM0_TM1_TK1
::
IsKnownAtCompileTime
()
&&
typename
enable_if
<
AThreadDesc_TK0_TM0_TM1_TK1
::
IsKnownAtCompileTime
()
&&
BThreadDesc_TK0_TN0_TN1_TK1
::
IsKnownAtCompileTime
()
&&
CThreadDesc_TM0_TM1_TN0_TN1
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
...
...
composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp
View file @
16effa76
...
...
@@ -19,7 +19,7 @@ template <typename FloatA,
typename
CDesc
,
index_t
H
,
index_t
W
,
typename
std
::
enable_if
<
ADesc
::
IsKnownAtCompileTime
()
&&
BDesc
::
IsKnownAtCompileTime
()
&&
typename
enable_if
<
ADesc
::
IsKnownAtCompileTime
()
&&
BDesc
::
IsKnownAtCompileTime
()
&&
CDesc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
struct
ThreadwiseGemmDlops_km_kn_mn_v3
...
...
composable_kernel/include/tensor_operation/threadwise_tensor_slice_set.hpp
View file @
16effa76
...
...
@@ -15,7 +15,7 @@ namespace ck {
template
<
typename
Data
,
typename
Desc
,
typename
SliceLengths
,
typename
std
::
enable_if
<
Desc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
typename
enable_if
<
Desc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
struct
ThreadwiseTensorSliceSet_v1
{
static
constexpr
index_t
nDim
=
SliceLengths
::
Size
();
...
...
composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer.hpp
View file @
16effa76
...
...
@@ -57,7 +57,7 @@ template <typename SrcData,
InMemoryDataOperationEnum_t
DstInMemOp
,
index_t
DstScalarStrideInVector
,
bool
DstResetCoordinateAfterRun
,
typename
std
::
enable_if
<
SrcDesc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
typename
enable_if
<
SrcDesc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
struct
ThreadwiseTensorSliceTransfer_v1r3
{
static
constexpr
index_t
nDim
=
SliceLengths
::
Size
();
...
...
@@ -373,7 +373,7 @@ template <typename SrcData,
index_t
SrcScalarPerVector
,
index_t
SrcScalarStrideInVector
,
bool
SrcResetCoordinateAfterRun
,
typename
std
::
enable_if
<
DstDesc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
typename
enable_if
<
DstDesc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
struct
ThreadwiseTensorSliceTransfer_v2
{
static
constexpr
index_t
nDim
=
SliceLengths
::
Size
();
...
...
@@ -1261,8 +1261,7 @@ struct ThreadwiseTensorSliceTransfer_v3
// 3. DstOriginIdx is known at compile-time
// 4. use direct address calculation
// 3. vector access on src
template
<
typename
SrcData
,
template
<
typename
SrcData
,
typename
DstData
,
typename
SrcDesc
,
typename
DstDesc
,
...
...
@@ -1271,7 +1270,7 @@ template <
index_t
SrcVectorDim
,
index_t
SrcScalarPerVector
,
index_t
SrcScalarStrideInVector
,
typename
std
::
enable_if
<
SrcDesc
::
IsKnownAtCompileTime
()
&&
DstDesc
::
IsKnownAtCompileTime
(),
typename
enable_if
<
SrcDesc
::
IsKnownAtCompileTime
()
&&
DstDesc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
struct
ThreadwiseTensorSliceTransfer_v4
{
...
...
composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_v2.hpp
View file @
16effa76
...
...
@@ -621,8 +621,7 @@ struct ThreadwiseTensorSliceTransfer_v3r1
// 3. DstOriginIdx is known at compile-time
// 4. use direct address calculation
// 3. vector access on src
template
<
typename
SrcData
,
template
<
typename
SrcData
,
typename
DstData
,
typename
SrcDesc
,
typename
DstDesc
,
...
...
@@ -630,7 +629,7 @@ template <
typename
DimAccessOrder
,
typename
SrcVectorTensorLengths
,
typename
SrcVectorTensorContiguousDimOrder
,
typename
std
::
enable_if
<
SrcDesc
::
IsKnownAtCompileTime
()
&&
DstDesc
::
IsKnownAtCompileTime
(),
typename
enable_if
<
SrcDesc
::
IsKnownAtCompileTime
()
&&
DstDesc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
struct
ThreadwiseTensorSliceTransfer_v4r1
{
...
...
composable_kernel/include/utility/c_style_pointer_cast.hpp
View file @
16effa76
...
...
@@ -2,12 +2,13 @@
#define CK_C_STYLE_POINTER_CAST_HPP
#include "type.hpp"
#include "enable_if.hpp"
namespace
ck
{
template
<
typename
PY
,
typename
PX
,
typename
std
::
enable_if
<
is_pointer_v
<
PY
>
&&
is_pointer_v
<
PX
>
,
bool
>::
type
=
false
>
typename
enable_if
<
is_pointer_v
<
PY
>
&&
is_pointer_v
<
PX
>
,
bool
>::
type
=
false
>
__host__
__device__
PY
c_style_pointer_cast
(
PX
p_x
)
{
#pragma clang diagnostic push
...
...
composable_kernel/include/utility/common_header.hpp
View file @
16effa76
...
...
@@ -14,6 +14,7 @@
#include "functional2.hpp"
#include "functional3.hpp"
#include "functional4.hpp"
#include "enable_if.hpp"
#include "integral_constant.hpp"
#include "math.hpp"
#include "number.hpp"
...
...
composable_kernel/include/utility/dynamic_buffer.hpp
View file @
16effa76
...
...
@@ -3,6 +3,7 @@
#include "amd_buffer_addressing.hpp"
#include "c_style_pointer_cast.hpp"
#include "enable_if.hpp"
namespace
ck
{
...
...
@@ -38,7 +39,7 @@ struct DynamicBuffer
}
template
<
typename
X
,
typename
std
::
enable_if
<
typename
enable_if
<
is_same
<
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
X
>
>>::
type
,
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
T
>>>::
type
>::
value
,
bool
>::
type
=
false
>
...
...
@@ -93,7 +94,7 @@ struct DynamicBuffer
}
template
<
typename
X
,
typename
std
::
enable_if
<
typename
enable_if
<
is_same
<
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
X
>
>>::
type
,
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
T
>>>::
type
>::
value
,
bool
>::
type
=
false
>
...
...
composable_kernel/include/utility/enable_if.hpp
0 → 100644
View file @
16effa76
#ifndef CK_ENABLE_IF_HPP
#define CK_ENABLE_IF_HPP
namespace
ck
{
template
<
bool
B
,
typename
T
=
void
>
using
enable_if
=
std
::
enable_if
<
B
,
T
>
;
template
<
bool
B
,
typename
T
=
void
>
using
enable_if_t
=
typename
std
::
enable_if
<
B
,
T
>::
type
;
}
// namespace ck
#endif
composable_kernel/include/utility/math.hpp
View file @
16effa76
...
...
@@ -5,6 +5,7 @@
#include "integral_constant.hpp"
#include "number.hpp"
#include "type.hpp"
#include "enable_if.hpp"
namespace
ck
{
namespace
math
{
...
...
@@ -184,9 +185,7 @@ __host__ __device__ constexpr auto gcd(Number<X>, Number<Y>)
return
Number
<
r
>
{};
}
template
<
typename
X
,
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
template
<
typename
X
,
typename
...
Ys
,
typename
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
auto
gcd
(
X
x
,
Ys
...
ys
)
{
return
gcd
(
x
,
gcd
(
ys
...));
...
...
@@ -199,9 +198,7 @@ __host__ __device__ constexpr auto lcm(X x, Y y)
return
(
x
*
y
)
/
gcd
(
x
,
y
);
}
template
<
typename
X
,
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
template
<
typename
X
,
typename
...
Ys
,
typename
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
auto
lcm
(
X
x
,
Ys
...
ys
)
{
return
lcm
(
x
,
lcm
(
ys
...));
...
...
composable_kernel/include/utility/tuple.hpp
View file @
16effa76
...
...
@@ -4,6 +4,7 @@
#include "integral_constant.hpp"
#include "sequence.hpp"
#include "type.hpp"
#include "enable_if.hpp"
namespace
ck
{
...
...
@@ -20,9 +21,8 @@ struct TupleElement
{
__host__
__device__
constexpr
TupleElement
()
=
default
;
template
<
typename
T
,
typename
std
::
enable_if
<!
is_same
<
remove_reference_t
<
remove_cv_t
<
T
>
>
,
TupleElement
>::
value
,
template
<
typename
T
,
typename
enable_if
<!
is_same
<
remove_reference_t
<
remove_cv_t
<
T
>
>
,
TupleElement
>::
value
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
TupleElement
(
T
&&
v
)
:
mData
(
std
::
forward
<
T
>
(
v
))
{
...
...
@@ -58,9 +58,8 @@ struct TupleImpl<Sequence<Is...>, Xs...> : TupleElement<TupleElementKey<Is>, Xs>
{
__host__
__device__
constexpr
TupleImpl
()
=
default
;
template
<
typename
Y
,
typename
std
::
enable_if
<
sizeof
...(
Is
)
==
1
&&
sizeof
...(
Xs
)
==
1
&&
template
<
typename
Y
,
typename
enable_if
<
sizeof
...(
Is
)
==
1
&&
sizeof
...(
Xs
)
==
1
&&
!
is_same
<
remove_reference_t
<
remove_cv_t
<
Y
>
>
,
TupleImpl
>::
value
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
TupleImpl
(
Y
&&
y
)
...
...
@@ -68,7 +67,7 @@ struct TupleImpl<Sequence<Is...>, Xs...> : TupleElement<TupleElementKey<Is>, Xs>
{
}
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
template
<
typename
...
Ys
,
typename
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
TupleImpl
(
Ys
&&
...
ys
)
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
(
std
::
forward
<
Ys
>
(
ys
))...
{
...
...
@@ -102,16 +101,16 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
__host__
__device__
constexpr
Tuple
()
=
default
;
template
<
typename
Y
,
typename
std
::
enable_if
<
sizeof
...(
Xs
)
==
1
&&
!
is_same
<
remove_reference_t
<
remove_cv_t
<
Y
>
>
,
Tuple
>::
value
,
typename
enable_if
<
sizeof
...(
Xs
)
==
1
&&
!
is_same
<
remove_reference_t
<
remove_cv_t
<
Y
>
>
,
Tuple
>::
value
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
Tuple
(
Y
&&
y
)
:
base
(
std
::
forward
<
Y
>
(
y
))
{
}
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
)
&&
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
typename
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
)
&&
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
Tuple
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
...
...
composable_kernel/include/utility/type.hpp
View file @
16effa76
...
...
@@ -2,6 +2,7 @@
#define CK_TYPE_HPP
#include "integral_constant.hpp"
#include "enable_if.hpp"
namespace
ck
{
...
...
@@ -39,9 +40,7 @@ struct is_known_at_compile_time<integral_constant<T, X>>
static
constexpr
bool
value
=
true
;
};
template
<
typename
Y
,
typename
X
,
typename
std
::
enable_if
<
sizeof
(
X
)
==
sizeof
(
Y
),
bool
>
::
type
=
false
>
template
<
typename
Y
,
typename
X
,
typename
enable_if
<
sizeof
(
X
)
==
sizeof
(
Y
),
bool
>
::
type
=
false
>
__host__
__device__
constexpr
Y
as_type
(
X
x
)
{
union
AsType
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment