Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
253f942b
Commit
253f942b
authored
Sep 22, 2023
by
Umang Yadav
Browse files
changes to make it compile
parent
8f9c0243
Changes
275
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
179 additions
and
94 deletions
+179
-94
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp
+5
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp
+5
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp
...ration/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp
+5
-0
include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp
...ude/ck/tensor_operation/gpu/thread/threadwise_welford.hpp
+5
-0
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
+5
-0
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
+5
-0
include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp
...tion/operator_transform/transform_contraction_to_gemm.hpp
+5
-0
include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp
...operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp
+5
-0
include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp
...eration/operator_transform/transform_conv_fwd_to_gemm.hpp
+5
-0
include/ck/utility/amd_address_space.hpp
include/ck/utility/amd_address_space.hpp
+5
-0
include/ck/utility/amd_buffer_addressing.hpp
include/ck/utility/amd_buffer_addressing.hpp
+5
-0
include/ck/utility/amd_inline_asm.hpp
include/ck/utility/amd_inline_asm.hpp
+5
-0
include/ck/utility/amd_wave_read_first_lane.hpp
include/ck/utility/amd_wave_read_first_lane.hpp
+84
-94
include/ck/utility/amd_wmma.hpp
include/ck/utility/amd_wmma.hpp
+5
-0
include/ck/utility/amd_xdlops.hpp
include/ck/utility/amd_xdlops.hpp
+5
-0
include/ck/utility/array.hpp
include/ck/utility/array.hpp
+5
-0
include/ck/utility/array_multi_index.hpp
include/ck/utility/array_multi_index.hpp
+5
-0
include/ck/utility/c_style_pointer_cast.hpp
include/ck/utility/c_style_pointer_cast.hpp
+5
-0
include/ck/utility/common_header.hpp
include/ck/utility/common_header.hpp
+5
-0
include/ck/utility/container_element_picker.hpp
include/ck/utility/container_element_picker.hpp
+5
-0
No files found.
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -258,3 +261,5 @@ struct ThreadwiseTensorSliceTransfer_v6r2
};
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -308,3 +311,5 @@ struct ThreadwiseTensorSliceTransfer_v6r3
};
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -296,3 +299,5 @@ struct ThreadwiseTensorSliceTransfer_v7
};
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -135,3 +138,5 @@ struct ThreadwiseWelfordMerge
};
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -505,3 +508,5 @@ struct WmmaGemm
};
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -965,3 +968,5 @@ struct XdlopsGemm
};
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -286,3 +289,5 @@ struct TransformBatchedContractionContractionToBatchedGemmGemm
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -1132,3 +1135,5 @@ struct TransformConvBwdDataToGemm_v1
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -878,3 +881,5 @@ struct TransformConvFwdToGemm
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/amd_address_space.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -43,3 +46,5 @@ __host__ __device__ T CK_CONSTANT_ADDRESS_SPACE* cast_pointer_to_constant_addres
}
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/amd_buffer_addressing.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -1312,3 +1315,5 @@ amd_buffer_atomic_max(const typename vector_type_maker<T, N>::type::type src_thr
}
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/amd_inline_asm.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -369,3 +372,5 @@ __device__ void amd_assembly_wmma_f32_16x16x16_f16_w32(half16_t a, half16_t b, f
}
// namespace ck
#endif
#pragma clang diagnostic pop
include/ck/utility/amd_wave_read_first_lane.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -7,87 +10,75 @@
#include "ck/utility/functional2.hpp"
#include "ck/utility/math.hpp"
#ifndef __HIPCC_RTC__
#include <array>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#endif
namespace
ck
{
namespace
detail
{
template
<
unsigned
SizeInBytes
>
struct
get_carrier
;
template
<
unsigned
SizeInBytes
>
struct
get_carrier
;
template
<
>
struct
get_carrier
<
1
>
{
using
type
=
uint8_t
;
template
<
>
struct
get_carrier
<
1
>
{
using
type
=
uint8_t
;
};
template
<
>
struct
get_carrier
<
2
>
{
using
type
=
uint16_t
;
template
<
>
struct
get_carrier
<
2
>
{
using
type
=
uint16_t
;
};
template
<
>
struct
get_carrier
<
3
>
{
using
type
=
class
carrier
{
using
value_type
=
uint32_t
;
std
::
array
<
std
::
byte
,
3
>
bytes
;
static_assert
(
sizeof
(
bytes
)
<=
sizeof
(
value_type
));
// replacement of host std::copy_n()
template
<
typename
InputIterator
,
typename
Size
,
typename
OutputIterator
>
__device__
static
OutputIterator
copy_n
(
InputIterator
from
,
Size
size
,
OutputIterator
to
)
{
if
(
0
<
size
)
{
*
to
=
*
from
;
++
to
;
for
(
Size
count
=
1
;
count
<
size
;
++
count
)
{
*
to
=
*++
from
;
++
to
;
}
}
return
to
;
template
<
>
struct
get_carrier
<
3
>
{
using
type
=
class
carrier
{
using
value_type
=
uint32_t
;
// std::array<std::byte, 3> bytes;
std
::
byte
bytes
[
3
];
static_assert
(
sizeof
(
bytes
)
<=
sizeof
(
value_type
));
// replacement of host std::copy_n()
template
<
typename
InputIterator
,
typename
Size
,
typename
OutputIterator
>
__device__
static
OutputIterator
copy_n
(
InputIterator
from
,
Size
size
,
OutputIterator
to
)
{
if
(
0
<
size
)
{
*
to
=
*
from
;
++
to
;
for
(
Size
count
=
1
;
count
<
size
;
++
count
)
{
*
to
=
*++
from
;
++
to
;
}
}
// method to trigger template substitution failure
__device__
carrier
(
const
carrier
&
other
)
noexcept
{
copy_n
(
other
.
bytes
.
begin
(),
bytes
.
size
(),
bytes
.
begin
());
}
return
to
;
}
public:
__device__
carrier
&
operator
=
(
value_type
value
)
noexcept
{
copy_n
(
reinterpret_cast
<
const
std
::
byte
*>
(
&
value
),
bytes
.
size
(),
bytes
.
begin
());
// method to trigger template substitution failure
__device__
carrier
(
const
carrier
&
other
)
noexcept
{
copy_n
(
&
other
.
bytes
[
0
],
3
,
&
bytes
[
0
]);
}
return
*
this
;
}
public:
__device__
carrier
&
operator
=
(
value_type
value
)
noexcept
{
copy_n
(
reinterpret_cast
<
const
std
::
byte
*>
(
&
value
),
3
,
&
bytes
[
0
]);
__device__
operator
value_type
()
const
noexcept
{
std
::
byte
result
[
sizeof
(
value_type
)];
return
*
this
;
}
copy_n
(
bytes
.
begin
(),
bytes
.
size
(),
result
);
__device__
operator
value_type
()
const
noexcept
{
std
::
byte
result
[
sizeof
(
value_type
)];
return
*
reinterpret_cast
<
const
value_type
*>
(
result
);
}
};
copy_n
(
&
bytes
[
0
],
3
,
result
);
return
*
reinterpret_cast
<
const
value_type
*>
(
result
);
}
};
};
static_assert
(
sizeof
(
get_carrier
<
3
>::
type
)
==
3
);
template
<
>
struct
get_carrier
<
4
>
{
using
type
=
uint32_t
;
template
<
>
struct
get_carrier
<
4
>
{
using
type
=
uint32_t
;
};
template
<
unsigned
SizeInBytes
>
...
...
@@ -95,44 +86,43 @@ using get_carrier_t = typename get_carrier<SizeInBytes>::type;
}
// namespace detail
__device__
inline
int32_t
amd_wave_read_first_lane
(
int32_t
value
)
{
return
__builtin_amdgcn_readfirstlane
(
value
);
__device__
inline
int32_t
amd_wave_read_first_lane
(
int32_t
value
)
{
return
__builtin_amdgcn_readfirstlane
(
value
);
}
template
<
typename
Object
,
typename
=
std
::
enable_if_t
<
std
::
is_class_v
<
Object
>
&&
std
::
is_trivially_copyable_v
<
Object
>>>
__device__
auto
amd_wave_read_first_lane
(
const
Object
&
obj
)
{
using
Size
=
unsigned
;
constexpr
Size
SgprSize
=
4
;
constexpr
Size
ObjectSize
=
sizeof
(
Object
);
auto
*
const
from_obj
=
reinterpret_cast
<
const
std
::
byte
*>
(
&
obj
);
alignas
(
Object
)
std
::
byte
to_obj
[
ObjectSize
];
constexpr
Size
RemainedSize
=
ObjectSize
%
SgprSize
;
constexpr
Size
CompleteSgprCopyBoundary
=
ObjectSize
-
RemainedSize
;
for
(
Size
offset
=
0
;
offset
<
CompleteSgprCopyBoundary
;
offset
+=
SgprSize
)
{
using
Sgpr
=
detail
::
get_carrier_t
<
SgprSize
>
;
*
reinterpret_cast
<
Sgpr
*>
(
to_obj
+
offset
)
=
amd_wave_read_first_lane
(
*
reinterpret_cast
<
const
Sgpr
*>
(
from_obj
+
offset
));
}
if
constexpr
(
0
<
RemainedSize
)
{
using
Carrier
=
detail
::
get_carrier_t
<
RemainedSize
>
;
*
reinterpret_cast
<
Carrier
*>
(
to_obj
+
CompleteSgprCopyBoundary
)
=
amd_wave_read_first_lane
(
*
reinterpret_cast
<
const
Carrier
*>
(
from_obj
+
CompleteSgprCopyBoundary
));
}
/// NOTE: Implicitly start object lifetime. It's better to use std::start_lifetime_at() in this
/// scenario
return
*
reinterpret_cast
<
Object
*>
(
to_obj
);
template
<
typename
Object
,
typename
=
std
::
enable_if_t
<
std
::
is_class
<
Object
>
::
value
&&
std
::
is_trivially_copyable
<
Object
>::
value
>>
__device__
auto
amd_wave_read_first_lane
(
const
Object
&
obj
)
{
using
Size
=
unsigned
;
constexpr
Size
SgprSize
=
4
;
constexpr
Size
ObjectSize
=
sizeof
(
Object
);
auto
*
const
from_obj
=
reinterpret_cast
<
const
std
::
byte
*>
(
&
obj
);
alignas
(
Object
)
std
::
byte
to_obj
[
ObjectSize
];
constexpr
Size
RemainedSize
=
ObjectSize
%
SgprSize
;
constexpr
Size
CompleteSgprCopyBoundary
=
ObjectSize
-
RemainedSize
;
for
(
Size
offset
=
0
;
offset
<
CompleteSgprCopyBoundary
;
offset
+=
SgprSize
)
{
using
Sgpr
=
detail
::
get_carrier_t
<
SgprSize
>
;
*
reinterpret_cast
<
Sgpr
*>
(
to_obj
+
offset
)
=
amd_wave_read_first_lane
(
*
reinterpret_cast
<
const
Sgpr
*>
(
from_obj
+
offset
));
}
if
constexpr
(
0
<
RemainedSize
)
{
using
Carrier
=
detail
::
get_carrier_t
<
RemainedSize
>
;
*
reinterpret_cast
<
Carrier
*>
(
to_obj
+
CompleteSgprCopyBoundary
)
=
amd_wave_read_first_lane
(
*
reinterpret_cast
<
const
Carrier
*>
(
from_obj
+
CompleteSgprCopyBoundary
));
}
/// NOTE: Implicitly start object lifetime. It's better to use
/// std::start_lifetime_at() in this scenario
return
*
reinterpret_cast
<
Object
*>
(
to_obj
);
}
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/amd_wmma.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -256,3 +259,5 @@ struct intrin_wmma_i32_16x16x16_iu8_w64<16, 16, neg_a, neg_b, clamp>
}
// namespace ck
#endif
#pragma clang diagnostic pop
include/ck/utility/amd_xdlops.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -419,3 +422,5 @@ struct intrin_mfma_f32_16x16x32f8f8<16, 16>
};
}
// namespace ck
#endif
#pragma clang diagnostic pop
include/ck/utility/array.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -64,3 +67,5 @@ __host__ __device__ constexpr auto make_array()
}
// namespace ck
#endif
#pragma clang diagnostic pop
include/ck/utility/array_multi_index.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -78,3 +81,5 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
}
// namespace ck
#endif
#pragma clang diagnostic pop
include/ck/utility/c_style_pointer_cast.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -23,3 +26,5 @@ __host__ __device__ PY c_style_pointer_cast(PX p_x)
}
// namespace ck
#endif
#pragma clang diagnostic pop
include/ck/utility/common_header.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -51,3 +54,5 @@
#ifdef CK_USE_AMD_MFMA
#include "ck/utility/amd_xdlops.hpp"
#endif
#pragma clang diagnostic pop
include/ck/utility/container_element_picker.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -156,3 +159,5 @@ __host__ __device__ constexpr auto pick_container_element(const Arr& a, Picks)
}
// namespace ck
#endif
#pragma clang diagnostic pop
Prev
1
…
8
9
10
11
12
13
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment