readfirstlane.hpp 1.89 KB
Newer Older
1
2
3
4
5
6
7
8
9
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

#include "ck/ck.hpp"
#include "ck/utility/functional2.hpp"
#include "ck/utility/math.hpp"

10
#include <cstddef>
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#include <cstdint>
#include <type_traits>

namespace ck {
namespace detail {

template <std::size_t Size>
struct get_signed_int;

template <>
struct get_signed_int<1>
{
    using type = std::int8_t;
};

template <>
struct get_signed_int<2>
{
    using type = std::int16_t;
};

template <>
struct get_signed_int<4>
{
    using type = std::int32_t;
};

template <std::size_t Size>
using get_signed_int_t = typename get_signed_int<Size>::type;

} // namespace detail

__device__ std::int32_t readfirstlane(std::int32_t value)
{
    return __builtin_amdgcn_readfirstlane(value);
}

template <
    typename Object,
    typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
__device__ auto readfirstlane(const Object& obj)
{
Po-Yen, Chen's avatar
Po-Yen, Chen committed
53
54
    constexpr std::size_t SgprSize   = 4;
    constexpr std::size_t ObjectSize = sizeof(Object);
55
56
57

    using Sgpr = detail::get_signed_int_t<SgprSize>;

58
    alignas(Object) std::byte memory[ObjectSize];
59

60
    const auto* from = reinterpret_cast<const std::byte*>(&obj);
61
62
63
64
65
    static_for<0, ObjectSize, SgprSize>{}([&](auto offset) {
        *reinterpret_cast<Sgpr*>(memory + offset) =
            readfirstlane(*reinterpret_cast<const Sgpr*>(from + offset));
    });

Po-Yen, Chen's avatar
Po-Yen, Chen committed
66
    constexpr std::size_t RemainedSize = ObjectSize % SgprSize;
67
68
69
70
71
72
73
74
75
76
77
78
79
80
    if constexpr(0 < RemainedSize)
    {
        using Carrier = detail::get_signed_int_t<RemainedSize>;

        constexpr std::size_t offset = SgprSize * math::integer_divide_floor(ObjectSize, SgprSize);

        *reinterpret_cast<Carrier>(memory + offset) =
            readfirstlane(*reinterpret_cast<const Carrier*>(from + offset));
    }

    return *reinterpret_cast<Object*>(memory);
}

} // namespace ck