// SPDX-License-Identifier: MIT // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. #pragma once #include "ck/ck.hpp" #include "ck/utility/functional2.hpp" #include "ck/utility/math.hpp" #include #include #include namespace ck { namespace detail { template struct get_unsigned_int; template <> struct get_unsigned_int<1> { using type = std::uint8_t; }; template <> struct get_unsigned_int<2> { using type = std::uint16_t; }; template <> struct get_unsigned_int<4> { using type = std::uint32_t; }; template using get_unsigned_int_t = typename get_unsigned_int::type; } // namespace detail __device__ inline std::int32_t readfirstlane(std::int32_t value) { return __builtin_amdgcn_readfirstlane(value); } template < typename Object, typename = std::enable_if_t && std::is_trivially_copyable_v>> __device__ auto readfirstlane(const Object& obj) { constexpr std::size_t SgprSize = 4; constexpr std::size_t ObjectSize = sizeof(Object); using Sgpr = detail::get_unsigned_int_t; alignas(Object) std::byte to_obj[ObjectSize]; auto* const from_obj = reinterpret_cast(&obj); constexpr std::size_t RemainedSize = ObjectSize % SgprSize; constexpr std::size_t CompleteSgprCopyBoundary = ObjectSize - RemainedSize; static_for<0, CompleteSgprCopyBoundary, SgprSize>{}([&](auto offset) { *reinterpret_cast(to_obj + offset) = readfirstlane(*reinterpret_cast(from_obj + offset)); }); if constexpr(0 < RemainedSize) { using Carrier = detail::get_unsigned_int_t; *reinterpret_cast(to_obj + CompleteSgprCopyBoundary) = readfirstlane(*reinterpret_cast(from_obj + CompleteSgprCopyBoundary)); } /// NOTE: Implicitly start object lifetime. It's better to use /// std::start_lifetime_at() in this scenario return *reinterpret_cast(to_obj); } } // namespace ck