readfirstlane.hpp 2.09 KB
Newer Older
1
2
3
4
5
6
7
8
9
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

#include "ck/ck.hpp"
#include "ck/utility/functional2.hpp"
#include "ck/utility/math.hpp"

10
#include <cstddef>
11
12
13
14
15
16
17
#include <cstdint>
#include <type_traits>

namespace ck {
namespace detail {

template <std::size_t Size>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
18
struct get_unsigned_int;
19
20

template <>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
21
struct get_unsigned_int<1>
22
{
Po-Yen, Chen's avatar
Po-Yen, Chen committed
23
    using type = std::uint8_t;
24
25
26
};

template <>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
27
struct get_unsigned_int<2>
28
{
Po-Yen, Chen's avatar
Po-Yen, Chen committed
29
    using type = std::uint16_t;
30
31
32
};

template <>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
33
struct get_unsigned_int<4>
34
{
Po-Yen, Chen's avatar
Po-Yen, Chen committed
35
    using type = std::uint32_t;
36
37
38
};

template <std::size_t Size>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
39
using get_unsigned_int_t = typename get_unsigned_int<Size>::type;
40
41
42

} // namespace detail

43
__device__ inline std::int32_t readfirstlane(std::int32_t value)
44
45
46
47
48
49
50
51
52
{
    return __builtin_amdgcn_readfirstlane(value);
}

template <
    typename Object,
    typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
__device__ auto readfirstlane(const Object& obj)
{
Po-Yen, Chen's avatar
Po-Yen, Chen committed
53
54
    constexpr std::size_t SgprSize   = 4;
    constexpr std::size_t ObjectSize = sizeof(Object);
55

Po-Yen, Chen's avatar
Po-Yen, Chen committed
56
    using Sgpr = detail::get_unsigned_int_t<SgprSize>;
57

58
    alignas(Object) std::byte to_obj[ObjectSize];
59

60
    auto* const from_obj = reinterpret_cast<const std::byte*>(&obj);
61
62
63
64

    constexpr std::size_t RemainedSize             = ObjectSize % SgprSize;
    constexpr std::size_t CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
    static_for<0, CompleteSgprCopyBoundary, SgprSize>{}([&](auto offset) {
65
66
        *reinterpret_cast<Sgpr*>(to_obj + offset) =
            readfirstlane(*reinterpret_cast<const Sgpr*>(from_obj + offset));
67
68
69
70
    });

    if constexpr(0 < RemainedSize)
    {
Po-Yen, Chen's avatar
Po-Yen, Chen committed
71
        using Carrier = detail::get_unsigned_int_t<RemainedSize>;
72

73
74
        *reinterpret_cast<Carrier>(to_obj + CompleteSgprCopyBoundary) =
            readfirstlane(*reinterpret_cast<const Carrier*>(from_obj + CompleteSgprCopyBoundary));
75
76
    }

77
    /// NOTE: Implicitly start object lifetime. It's better to use
78
    ///        std::start_lifetime_at() in this scenario
79
    return *reinterpret_cast<Object*>(to_obj);
80
81
82
}

} // namespace ck