Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
fa9da1a4
Commit
fa9da1a4
authored
Jun 19, 2023
by
Jun Liu
Browse files
Merge branch 'amd-develop' into amd-master
parents
4c105089
457308e3
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
174 additions
and
40 deletions
+174
-40
include/ck/utility/amd_inline_asm.hpp
include/ck/utility/amd_inline_asm.hpp
+1
-1
include/ck/utility/amd_llvm_intrinsic.hpp
include/ck/utility/amd_llvm_intrinsic.hpp
+0
-14
include/ck/utility/amd_wave_read_first_lane.hpp
include/ck/utility/amd_wave_read_first_lane.hpp
+138
-0
include/ck/utility/amd_wmma.hpp
include/ck/utility/amd_wmma.hpp
+1
-1
include/ck/utility/amd_xdlops.hpp
include/ck/utility/amd_xdlops.hpp
+2
-2
include/ck/utility/array.hpp
include/ck/utility/array.hpp
+1
-1
include/ck/utility/array_multi_index.hpp
include/ck/utility/array_multi_index.hpp
+1
-1
include/ck/utility/c_style_pointer_cast.hpp
include/ck/utility/c_style_pointer_cast.hpp
+1
-1
include/ck/utility/common_header.hpp
include/ck/utility/common_header.hpp
+2
-1
include/ck/utility/container_element_picker.hpp
include/ck/utility/container_element_picker.hpp
+1
-1
include/ck/utility/container_helper.hpp
include/ck/utility/container_helper.hpp
+1
-1
include/ck/utility/debug.hpp
include/ck/utility/debug.hpp
+1
-1
include/ck/utility/dynamic_buffer.hpp
include/ck/utility/dynamic_buffer.hpp
+17
-8
include/ck/utility/enable_if.hpp
include/ck/utility/enable_if.hpp
+1
-1
include/ck/utility/functional.hpp
include/ck/utility/functional.hpp
+1
-1
include/ck/utility/functional2.hpp
include/ck/utility/functional2.hpp
+1
-1
include/ck/utility/functional3.hpp
include/ck/utility/functional3.hpp
+1
-1
include/ck/utility/functional4.hpp
include/ck/utility/functional4.hpp
+1
-1
include/ck/utility/generic_memory_space_atomic.hpp
include/ck/utility/generic_memory_space_atomic.hpp
+1
-1
include/ck/utility/get_id.hpp
include/ck/utility/get_id.hpp
+1
-1
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
include/ck/utility/amd_inline_asm.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP
...
...
include/ck/utility/amd_llvm_intrinsic.hpp
deleted
100644 → 0
View file @
4c105089
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_LLVM_INTRINSIC_HPP
#define CK_AMD_LLVM_INTRINSIC_HPP
#include "data_type.hpp"
namespace
ck
{
__device__
int32_t
llvm_amdgcn_readfirstlane_i32
(
int32_t
i
)
__asm
(
"llvm.amdgcn.readfirstlane"
);
}
// namespace ck
#endif
include/ck/utility/amd_wave_read_first_lane.hpp
0 → 100644
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/ck.hpp"
#include "ck/utility/functional2.hpp"
#include "ck/utility/math.hpp"
#include <array>
#include <cstddef>
#include <cstdint>
#include <type_traits>
namespace
ck
{
namespace
detail
{
template
<
unsigned
SizeInBytes
>
struct
get_carrier
;
template
<
>
struct
get_carrier
<
1
>
{
using
type
=
uint8_t
;
};
template
<
>
struct
get_carrier
<
2
>
{
using
type
=
uint16_t
;
};
template
<
>
struct
get_carrier
<
3
>
{
using
type
=
class
carrier
{
using
value_type
=
uint32_t
;
std
::
array
<
std
::
byte
,
3
>
bytes
;
static_assert
(
sizeof
(
bytes
)
<=
sizeof
(
value_type
));
// replacement of host std::copy_n()
template
<
typename
InputIterator
,
typename
Size
,
typename
OutputIterator
>
__device__
static
OutputIterator
copy_n
(
InputIterator
from
,
Size
size
,
OutputIterator
to
)
{
if
(
0
<
size
)
{
*
to
=
*
from
;
++
to
;
for
(
Size
count
=
1
;
count
<
size
;
++
count
)
{
*
to
=
*++
from
;
++
to
;
}
}
return
to
;
}
// method to trigger template substitution failure
__device__
carrier
(
const
carrier
&
other
)
noexcept
{
copy_n
(
other
.
bytes
.
begin
(),
bytes
.
size
(),
bytes
.
begin
());
}
public:
__device__
carrier
&
operator
=
(
value_type
value
)
noexcept
{
copy_n
(
reinterpret_cast
<
const
std
::
byte
*>
(
&
value
),
bytes
.
size
(),
bytes
.
begin
());
return
*
this
;
}
__device__
operator
value_type
()
const
noexcept
{
std
::
byte
result
[
sizeof
(
value_type
)];
copy_n
(
bytes
.
begin
(),
bytes
.
size
(),
result
);
return
*
reinterpret_cast
<
const
value_type
*>
(
result
);
}
};
};
static_assert
(
sizeof
(
get_carrier
<
3
>::
type
)
==
3
);
template
<
>
struct
get_carrier
<
4
>
{
using
type
=
uint32_t
;
};
template
<
unsigned
SizeInBytes
>
using
get_carrier_t
=
typename
get_carrier
<
SizeInBytes
>::
type
;
}
// namespace detail
__device__
inline
int32_t
amd_wave_read_first_lane
(
int32_t
value
)
{
return
__builtin_amdgcn_readfirstlane
(
value
);
}
template
<
typename
Object
,
typename
=
std
::
enable_if_t
<
std
::
is_class_v
<
Object
>
&&
std
::
is_trivially_copyable_v
<
Object
>>>
__device__
auto
amd_wave_read_first_lane
(
const
Object
&
obj
)
{
using
Size
=
unsigned
;
constexpr
Size
SgprSize
=
4
;
constexpr
Size
ObjectSize
=
sizeof
(
Object
);
auto
*
const
from_obj
=
reinterpret_cast
<
const
std
::
byte
*>
(
&
obj
);
alignas
(
Object
)
std
::
byte
to_obj
[
ObjectSize
];
constexpr
Size
RemainedSize
=
ObjectSize
%
SgprSize
;
constexpr
Size
CompleteSgprCopyBoundary
=
ObjectSize
-
RemainedSize
;
for
(
Size
offset
=
0
;
offset
<
CompleteSgprCopyBoundary
;
offset
+=
SgprSize
)
{
using
Sgpr
=
detail
::
get_carrier_t
<
SgprSize
>
;
*
reinterpret_cast
<
Sgpr
*>
(
to_obj
+
offset
)
=
amd_wave_read_first_lane
(
*
reinterpret_cast
<
const
Sgpr
*>
(
from_obj
+
offset
));
}
if
constexpr
(
0
<
RemainedSize
)
{
using
Carrier
=
detail
::
get_carrier_t
<
RemainedSize
>
;
*
reinterpret_cast
<
Carrier
*>
(
to_obj
+
CompleteSgprCopyBoundary
)
=
amd_wave_read_first_lane
(
*
reinterpret_cast
<
const
Carrier
*>
(
from_obj
+
CompleteSgprCopyBoundary
));
}
/// NOTE: Implicitly start object lifetime. It's better to use std::start_lifetime_at() in this
/// scenario
return
*
reinterpret_cast
<
Object
*>
(
to_obj
);
}
}
// namespace ck
include/ck/utility/amd_wmma.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_WMMA_HPP
#define CK_AMD_WMMA_HPP
...
...
include/ck/utility/amd_xdlops.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_XDLOPS_HPP
#define CK_AMD_XDLOPS_HPP
...
...
@@ -344,7 +344,7 @@ struct intrin_mfma_f64_16x16x4f64<16, 16>
template
<
class
FloatC
>
__device__
static
void
Run
(
const
double
&
reg_a
,
const
double
&
reg_b
,
FloatC
&
reg_c
)
{
#if defined(__gfx90a__) || defined(__gfx940__)
#if defined(__gfx90a__) || defined(__gfx940__)
|| defined(__gfx941__) || defined(__gfx942__)
reg_c
.
template
AsType
<
double4_t
>()(
Number
<
0
>
{})
=
__builtin_amdgcn_mfma_f64_16x16x4f64
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
double4_t
>()[
Number
<
0
>
{}],
0
,
0
,
0
);
#else
...
...
include/ck/utility/array.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_ARRAY_HPP
#define CK_ARRAY_HPP
...
...
include/ck/utility/array_multi_index.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_ARRAY_MULTI_INDEX_HPP
#define CK_ARRAY_MULTI_INDEX_HPP
...
...
include/ck/utility/c_style_pointer_cast.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_C_STYLE_POINTER_CAST_HPP
#define CK_C_STYLE_POINTER_CAST_HPP
...
...
include/ck/utility/common_header.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -33,6 +33,7 @@
#include "ck/utility/debug.hpp"
#include "ck/utility/amd_buffer_addressing.hpp"
#include "ck/utility/amd_wave_read_first_lane.hpp"
#include "ck/utility/generic_memory_space_atomic.hpp"
#include "ck/utility/get_id.hpp"
#include "ck/utility/thread_group.hpp"
...
...
include/ck/utility/container_element_picker.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_CONTAINER_ELEMENT_PICKER_HPP
#define CK_CONTAINER_ELEMENT_PICKER_HPP
...
...
include/ck/utility/container_helper.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_CONTAINER_HELPER_HPP
#define CK_CONTAINER_HELPER_HPP
...
...
include/ck/utility/debug.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef UTILITY_DEBUG_HPP
#define UTILITY_DEBUG_HPP
...
...
include/ck/utility/dynamic_buffer.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -19,7 +19,8 @@ namespace ck {
template
<
AddressSpaceEnum
BufferAddressSpace
,
typename
T
,
typename
ElementSpaceSize
,
bool
InvalidElementUseNumericalZeroValue
>
bool
InvalidElementUseNumericalZeroValue
,
AmdBufferCoherenceEnum
coherence
=
AmdBufferCoherenceEnum
::
DefaultCoherence
>
struct
DynamicBuffer
{
using
type
=
T
;
...
...
@@ -77,13 +78,16 @@ struct DynamicBuffer
if
constexpr
(
InvalidElementUseNumericalZeroValue
)
{
return
amd_buffer_load_invalid_element_return_zero
<
remove_cvref_t
<
T
>
,
t_per_x
>
(
return
amd_buffer_load_invalid_element_return_zero
<
remove_cvref_t
<
T
>
,
t_per_x
,
coherence
>
(
p_data_
,
i
,
is_valid_element
,
element_space_size_
);
}
else
{
return
amd_buffer_load_invalid_element_return_customized_value
<
remove_cvref_t
<
T
>
,
t_per_x
>
(
t_per_x
,
coherence
>
(
p_data_
,
i
,
is_valid_element
,
element_space_size_
,
invalid_element_value_
);
}
}
...
...
@@ -173,7 +177,7 @@ struct DynamicBuffer
{
constexpr
index_t
t_per_x
=
scalar_per_x_vector
/
scalar_per_t_vector
;
amd_buffer_store
<
remove_cvref_t
<
T
>
,
t_per_x
>
(
amd_buffer_store
<
remove_cvref_t
<
T
>
,
t_per_x
,
coherence
>
(
x
,
p_data_
,
i
,
is_valid_element
,
element_space_size_
);
}
else
if
constexpr
(
GetAddressSpace
()
==
AddressSpaceEnum
::
Lds
&&
...
...
@@ -376,14 +380,19 @@ struct DynamicBuffer
__host__
__device__
static
constexpr
bool
IsDynamicBuffer
()
{
return
true
;
}
};
template
<
AddressSpaceEnum
BufferAddressSpace
,
typename
T
,
typename
ElementSpaceSize
>
template
<
AddressSpaceEnum
BufferAddressSpace
,
AmdBufferCoherenceEnum
coherence
=
AmdBufferCoherenceEnum
::
DefaultCoherence
,
typename
T
,
typename
ElementSpaceSize
>
__host__
__device__
constexpr
auto
make_dynamic_buffer
(
T
*
p
,
ElementSpaceSize
element_space_size
)
{
return
DynamicBuffer
<
BufferAddressSpace
,
T
,
ElementSpaceSize
,
true
>
{
p
,
element_space_size
};
return
DynamicBuffer
<
BufferAddressSpace
,
T
,
ElementSpaceSize
,
true
,
coherence
>
{
p
,
element_space_size
};
}
template
<
AddressSpaceEnum
BufferAddressSpace
,
AmdBufferCoherenceEnum
coherence
=
AmdBufferCoherenceEnum
::
DefaultCoherence
,
typename
T
,
typename
ElementSpaceSize
,
typename
X
,
...
...
@@ -391,7 +400,7 @@ template <
__host__
__device__
constexpr
auto
make_dynamic_buffer
(
T
*
p
,
ElementSpaceSize
element_space_size
,
X
invalid_element_value
)
{
return
DynamicBuffer
<
BufferAddressSpace
,
T
,
ElementSpaceSize
,
false
>
{
return
DynamicBuffer
<
BufferAddressSpace
,
T
,
ElementSpaceSize
,
false
,
coherence
>
{
p
,
element_space_size
,
invalid_element_value
};
}
...
...
include/ck/utility/enable_if.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
include/ck/utility/functional.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
include/ck/utility/functional2.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
include/ck/utility/functional3.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
include/ck/utility/functional4.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_FUNCTIONAL4_HPP
#define CK_FUNCTIONAL4_HPP
...
...
include/ck/utility/generic_memory_space_atomic.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "data_type.hpp"
...
...
include/ck/utility/get_id.hpp
View file @
fa9da1a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
Prev
1
…
23
24
25
26
27
28
29
30
31
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment