Commit 1fc208c8 authored by Po-Yen, Chen's avatar Po-Yen, Chen
Browse files

Copy object itself in readfirstlane()

parent e4c55024
...@@ -37,56 +37,43 @@ struct get_signed_int<4> ...@@ -37,56 +37,43 @@ struct get_signed_int<4>
template <std::size_t Size> template <std::size_t Size>
using get_signed_int_t = typename get_signed_int<Size>::type; using get_signed_int_t = typename get_signed_int<Size>::type;
template <typename Object> } // namespace detail
struct sgpr_ptr
__device__ std::int32_t readfirstlane(std::int32_t value)
{ {
static_assert(!std::is_const_v<Object> && !std::is_reference_v<Object> && return __builtin_amdgcn_readfirstlane(value);
std::is_trivially_copyable_v<Object>); }
template <
typename Object,
typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
__device__ auto readfirstlane(const Object& obj)
{
static constexpr std::size_t SgprSize = 4; static constexpr std::size_t SgprSize = 4;
static constexpr std::size_t ObjectSize = sizeof(Object); static constexpr std::size_t ObjectSize = sizeof(Object);
using Sgpr = get_signed_int_t<SgprSize>; using Sgpr = detail::get_signed_int_t<SgprSize>;
alignas(Object) unsigned char memory[ObjectSize];
__device__ explicit sgpr_ptr(const Object& obj) noexcept
{
const auto* from = reinterpret_cast<const unsigned char*>(&obj); const auto* from = reinterpret_cast<const unsigned char*>(&obj);
static_for<0, ObjectSize, SgprSize>{}([&](auto offset) { static_for<0, ObjectSize, SgprSize>{}([&](auto offset) {
*reinterpret_cast<Sgpr*>(memory + offset) = *reinterpret_cast<Sgpr*>(memory + offset) =
__builtin_amdgcn_readfirstlane(*reinterpret_cast<const Sgpr*>(from + offset)); readfirstlane(*reinterpret_cast<const Sgpr*>(from + offset));
}); });
constexpr std::size_t RemainedSize = ObjectSize % SgprSize; static constexpr std::size_t RemainedSize = ObjectSize % SgprSize;
if constexpr(0 < RemainedSize) if constexpr(0 < RemainedSize)
{ {
using Carrier = get_signed_int_t<RemainedSize>; using Carrier = detail::get_signed_int_t<RemainedSize>;
constexpr std::size_t offset = constexpr std::size_t offset = SgprSize * math::integer_divide_floor(ObjectSize, SgprSize);
SgprSize * math::integer_divide_floor(ObjectSize, SgprSize);
*reinterpret_cast<Carrier>(memory + offset) = *reinterpret_cast<Carrier>(memory + offset) =
__builtin_amdgcn_readfirstlane(*reinterpret_cast<const Carrier*>(from + offset)); readfirstlane(*reinterpret_cast<const Carrier*>(from + offset));
} }
}
__device__ Object& operator*() { return *(this->operator->()); }
__device__ const Object& operator*() const { return *(this->operator->()); } return *reinterpret_cast<Object*>(memory);
__device__ Object* operator->() { return reinterpret_cast<Object*>(memory); }
__device__ const Object* operator->() const { return reinterpret_cast<const Object*>(memory); }
private:
alignas(
Object) unsigned char memory[SgprSize * math::integer_divide_ceil(ObjectSize, SgprSize)];
};
} // namespace detail
template <typename T>
__device__ constexpr auto readfirstlane(const T& obj)
{
return detail::sgpr_ptr<T>(obj);
} }
} // namespace ck } // namespace ck
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment