"git@developer.sourcefind.cn:cnjsdfcy/simbricks.git" did not exist on "5fccfd876e2cbe6ed09500c6abc9f40427a47751"
Commit a0c3eb51 authored by Po-Yen, Chen's avatar Po-Yen, Chen
Browse files

Merge branch 'feature/support-readfirstlane-for-object-types' into...

Merge branch 'feature/support-readfirstlane-for-object-types' into feature/simplify-karg-for-device-gemm-xdl-improved
parents 6fde8c86 232972e4
...@@ -40,7 +40,7 @@ using get_unsigned_int_t = typename get_unsigned_int<Size>::type; ...@@ -40,7 +40,7 @@ using get_unsigned_int_t = typename get_unsigned_int<Size>::type;
} // namespace detail } // namespace detail
__device__ inline int32_t readfirstlane(int32_t value) __device__ inline int32_t amd_wave_read_first_lane(int32_t value)
{ {
return __builtin_amdgcn_readfirstlane(value); return __builtin_amdgcn_readfirstlane(value);
} }
...@@ -48,7 +48,7 @@ __device__ inline int32_t readfirstlane(int32_t value) ...@@ -48,7 +48,7 @@ __device__ inline int32_t readfirstlane(int32_t value)
template < template <
typename Object, typename Object,
typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>> typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
__device__ auto readfirstlane(const Object& obj) __device__ auto amd_wave_read_first_lane(const Object& obj)
{ {
using Size = unsigned; using Size = unsigned;
constexpr Size SgprSize = 4; constexpr Size SgprSize = 4;
...@@ -65,7 +65,7 @@ __device__ auto readfirstlane(const Object& obj) ...@@ -65,7 +65,7 @@ __device__ auto readfirstlane(const Object& obj)
for(Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize) for(Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize)
{ {
*reinterpret_cast<Sgpr*>(to_obj + offset) = *reinterpret_cast<Sgpr*>(to_obj + offset) =
readfirstlane(*reinterpret_cast<const Sgpr*>(from_obj + offset)); amd_wave_read_first_lane(*reinterpret_cast<const Sgpr*>(from_obj + offset));
} }
if constexpr(0 < RemainedSize) if constexpr(0 < RemainedSize)
...@@ -73,7 +73,7 @@ __device__ auto readfirstlane(const Object& obj) ...@@ -73,7 +73,7 @@ __device__ auto readfirstlane(const Object& obj)
using Carrier = detail::get_unsigned_int_t<RemainedSize>; using Carrier = detail::get_unsigned_int_t<RemainedSize>;
*reinterpret_cast<Carrier>(to_obj + CompleteSgprCopyBoundary) = *reinterpret_cast<Carrier>(to_obj + CompleteSgprCopyBoundary) =
readfirstlane(*reinterpret_cast<const Carrier*>(from_obj + CompleteSgprCopyBoundary)); amd_wave_read_first_lane(*reinterpret_cast<const Carrier*>(from_obj + CompleteSgprCopyBoundary));
} }
/// NOTE: Implicitly start object lifetime. It's better to use std::start_lifetime_at() in this /// NOTE: Implicitly start object lifetime. It's better to use std::start_lifetime_at() in this
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "ck/utility/debug.hpp" #include "ck/utility/debug.hpp"
#include "ck/utility/amd_buffer_addressing.hpp" #include "ck/utility/amd_buffer_addressing.hpp"
#include "ck/utility/amd_wave_read_first_lane.hpp"
#include "ck/utility/generic_memory_space_atomic.hpp" #include "ck/utility/generic_memory_space_atomic.hpp"
#include "ck/utility/get_id.hpp" #include "ck/utility/get_id.hpp"
#include "ck/utility/thread_group.hpp" #include "ck/utility/thread_group.hpp"
...@@ -40,7 +41,6 @@ ...@@ -40,7 +41,6 @@
#include "ck/utility/amd_address_space.hpp" #include "ck/utility/amd_address_space.hpp"
#include "ck/utility/static_buffer.hpp" #include "ck/utility/static_buffer.hpp"
#include "ck/utility/dynamic_buffer.hpp" #include "ck/utility/dynamic_buffer.hpp"
#include "ck/utility/readfirstlane.hpp"
// TODO: remove this // TODO: remove this
#if CK_USE_AMD_INLINE_ASM #if CK_USE_AMD_INLINE_ASM
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment