readfirstlane.hpp 2.12 KB
Newer Older
1
2
3
4
5
6
7
8
9
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

#include "ck/ck.hpp"
#include "ck/utility/functional2.hpp"
#include "ck/utility/math.hpp"

10
#include <cstddef>
11
12
13
14
15
16
#include <cstdint>
#include <type_traits>

namespace ck {
namespace detail {

Po-Yen, Chen's avatar
Po-Yen, Chen committed
17
template <unsigned Size>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
18
struct get_unsigned_int;
19
20

template <>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
21
struct get_unsigned_int<1>
22
{
23
    using type = uint8_t;
24
25
26
};

template <>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
27
struct get_unsigned_int<2>
28
{
29
    using type = uint16_t;
30
31
32
};

template <>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
33
struct get_unsigned_int<4>
34
{
35
    using type = uint32_t;
36
37
};

Po-Yen, Chen's avatar
Po-Yen, Chen committed
38
template <unsigned Size>
Po-Yen, Chen's avatar
Po-Yen, Chen committed
39
using get_unsigned_int_t = typename get_unsigned_int<Size>::type;
40
41
42

} // namespace detail

43
__device__ inline int32_t amd_wave_read_first_lane(int32_t value)
44
45
46
47
48
49
50
{
    return __builtin_amdgcn_readfirstlane(value);
}

template <
    typename Object,
    typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
51
__device__ auto amd_wave_read_first_lane(const Object& obj)
52
{
Po-Yen, Chen's avatar
Po-Yen, Chen committed
53
54
55
    using Size                = unsigned;
    constexpr Size SgprSize   = 4;
    constexpr Size ObjectSize = sizeof(Object);
56

Po-Yen, Chen's avatar
Po-Yen, Chen committed
57
    using Sgpr = detail::get_unsigned_int_t<SgprSize>;
58

59
    alignas(Object) std::byte to_obj[ObjectSize];
60

61
    auto* const from_obj = reinterpret_cast<const std::byte*>(&obj);
62

Po-Yen, Chen's avatar
Po-Yen, Chen committed
63
64
    constexpr Size RemainedSize             = ObjectSize % SgprSize;
    constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
65
66
    for(Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize)
    {
67
        *reinterpret_cast<Sgpr*>(to_obj + offset) =
68
            amd_wave_read_first_lane(*reinterpret_cast<const Sgpr*>(from_obj + offset));
69
    }
70
71
72

    if constexpr(0 < RemainedSize)
    {
Po-Yen, Chen's avatar
Po-Yen, Chen committed
73
        using Carrier = detail::get_unsigned_int_t<RemainedSize>;
74

75
        *reinterpret_cast<Carrier>(to_obj + CompleteSgprCopyBoundary) =
76
            amd_wave_read_first_lane(*reinterpret_cast<const Carrier*>(from_obj + CompleteSgprCopyBoundary));
77
78
    }

Po-Yen, Chen's avatar
Po-Yen, Chen committed
79
80
    /// NOTE: Implicitly start object lifetime. It's better to use std::start_lifetime_at() in this
    /// scenario
81
    return *reinterpret_cast<Object*>(to_obj);
82
83
84
}

} // namespace ck