readfirstlane.hpp 2.07 KB
Newer Older
1
2
3
4
5
6
7
8
9
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

#include "ck/ck.hpp"
#include "ck/utility/functional2.hpp"
#include "ck/utility/math.hpp"

10
#include <cstddef>
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#include <cstdint>
#include <type_traits>

namespace ck {
namespace detail {

template <std::size_t Size>
struct get_signed_int;

template <>
struct get_signed_int<1>
{
    using type = std::int8_t;
};

template <>
struct get_signed_int<2>
{
    using type = std::int16_t;
};

template <>
struct get_signed_int<4>
{
    using type = std::int32_t;
};

template <std::size_t Size>
using get_signed_int_t = typename get_signed_int<Size>::type;

} // namespace detail

43
__device__ inline std::int32_t readfirstlane(std::int32_t value)
44
45
46
47
48
49
50
51
52
{
    return __builtin_amdgcn_readfirstlane(value);
}

template <
    typename Object,
    typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
__device__ auto readfirstlane(const Object& obj)
{
Po-Yen, Chen's avatar
Po-Yen, Chen committed
53
54
    constexpr std::size_t SgprSize   = 4;
    constexpr std::size_t ObjectSize = sizeof(Object);
55
56
57

    using Sgpr = detail::get_signed_int_t<SgprSize>;

58
    alignas(Object) std::byte to_obj[ObjectSize];
59

60
    auto* const from_obj = reinterpret_cast<const std::byte*>(&obj);
61
62
63
64

    constexpr std::size_t RemainedSize             = ObjectSize % SgprSize;
    constexpr std::size_t CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
    static_for<0, CompleteSgprCopyBoundary, SgprSize>{}([&](auto offset) {
65
66
        *reinterpret_cast<Sgpr*>(to_obj + offset) =
            readfirstlane(*reinterpret_cast<const Sgpr*>(from_obj + offset));
67
68
69
70
71
72
    });

    if constexpr(0 < RemainedSize)
    {
        using Carrier = detail::get_signed_int_t<RemainedSize>;

73
74
        *reinterpret_cast<Carrier>(to_obj + CompleteSgprCopyBoundary) =
            readfirstlane(*reinterpret_cast<const Carrier*>(from_obj + CompleteSgprCopyBoundary));
75
76
    }

77
    /// NOTE: Implicitly start object lifetime. It's better to use
78
    ///        std::start_lifetime_at() in this scenario
79
    return *reinterpret_cast<Object*>(to_obj);
80
81
82
}

} // namespace ck