tile_program.hpp 2.85 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
4
5
6
7
8

#include <hip/hip_runtime.h>

#include "ck/ck.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"

Chao Liu's avatar
Chao Liu committed
9
10
11
12
// Meta data for GPU
// TODO: do we need to take care of data alignment in code or it's done by compiler?
template <ck::index_t kSize>
struct MetaData
Chao Liu's avatar
Chao Liu committed
13
{
Chao Liu's avatar
Chao Liu committed
14
15
    char p_data_[kSize];

Chao Liu's avatar
Chao Liu committed
16
17
18
19
    ck::index_t size_ = 0;
    ck::index_t pos_  = 0;

    __host__ __device__ void reset()
Chao Liu's avatar
Chao Liu committed
20
    {
Chao Liu's avatar
Chao Liu committed
21
22
23
        size_ = 0;
        pos_  = 0;
    }
Chao Liu's avatar
Chao Liu committed
24

Chao Liu's avatar
Chao Liu committed
25
    __device__ void reset_pos() { pos_ = 0; }
Chao Liu's avatar
Chao Liu committed
26

Chao Liu's avatar
Chao Liu committed
27
28
    // push meta data on host
    // TODO: correct forwarding?
Chao Liu's avatar
Chao Liu committed
29
    template <typename T>
Chao Liu's avatar
Chao Liu committed
30
    __host__ auto push(T&& a)
Chao Liu's avatar
Chao Liu committed
31
    {
Chao Liu's avatar
Chao Liu committed
32
        assert(size_ + sizeof(Type) <= kSize);
Chao Liu's avatar
Chao Liu committed
33

Chao Liu's avatar
Chao Liu committed
34
        using Type = ck::remove_cvref_t<T>;
Chao Liu's avatar
Chao Liu committed
35

Chao Liu's avatar
Chao Liu committed
36
37
38
39
40
        *reinterpret_cast<Type*>(p_data_ + size_) = a;

        size_ += sizeof(Type);

        return ck::forwarder{}(a);
Chao Liu's avatar
Chao Liu committed
41
42
    }

Chao Liu's avatar
Chao Liu committed
43
44
    // pull meta data on device
    // TODO: correct forwarding?
Chao Liu's avatar
Chao Liu committed
45
    template <typename T>
Chao Liu's avatar
Chao Liu committed
46
    __device__ auto pull()
Chao Liu's avatar
Chao Liu committed
47
    {
Chao Liu's avatar
Chao Liu committed
48
        using Type = ck::remove_cvref_t<T>;
Chao Liu's avatar
Chao Liu committed
49

Chao Liu's avatar
Chao Liu committed
50
51
52
        Type a = *reinterpret_cast<Type*>(p_data_ + pos_);

        pos_ += sizeof(Type);
Chao Liu's avatar
Chao Liu committed
53
54
55

        return a;
    }
Chao Liu's avatar
Chao Liu committed
56
};
Chao Liu's avatar
Chao Liu committed
57

Chao Liu's avatar
Chao Liu committed
58
// namespace tp (for tile programming)
Chao Liu's avatar
Chao Liu committed
59
struct ProgramServer
Chao Liu's avatar
Chao Liu committed
60
{
Chao Liu's avatar
Chao Liu committed
61
62
63
64
    // meta data on device
    MetaData<1024> meta_data_;

    __host__ void cpu_init() { meta_data_.reset(); }
Chao Liu's avatar
Chao Liu committed
65

Chao Liu's avatar
Chao Liu committed
66
    __device__ void gpu_init() { meta_data_.reset_pos(); }
Chao Liu's avatar
Chao Liu committed
67

Chao Liu's avatar
Chao Liu committed
68
    // push meta data on host
Chao Liu's avatar
Chao Liu committed
69
    template <typename T>
Chao Liu's avatar
Chao Liu committed
70
    __host__ auto operator()(T&& a)
Chao Liu's avatar
Chao Liu committed
71
    {
Chao Liu's avatar
Chao Liu committed
72
        return ck::forwarder{}(meta_data_.push(a));
Chao Liu's avatar
Chao Liu committed
73
    }
Chao Liu's avatar
Chao Liu committed
74

Chao Liu's avatar
Chao Liu committed
75
    // push meta data on host
Chao Liu's avatar
Chao Liu committed
76
    template <typename T>
Chao Liu's avatar
Chao Liu committed
77
    __device__ auto operator()(T&&)
Chao Liu's avatar
Chao Liu committed
78
    {
Chao Liu's avatar
Chao Liu committed
79
        return ck::forwarder{}(meta_data_.pull<T>());
Chao Liu's avatar
Chao Liu committed
80
    }
Chao Liu's avatar
Chao Liu committed
81
82
83
84
85
86
87
88

    __host__ static ck::index_t get_block_1d_id() { return -1; }

    __host__ static ck::index_t get_grid_size() { return -1; }

    __device__ static ck::index_t get_block_1d_id() { return ck::get_block_1d_id(); }

    __device__ static ck::index_t get_grid_size() { return ck::get_grid_size(); }
Chao Liu's avatar
Chao Liu committed
89
90
91
92
93
94
95
96
97
98
99
100
101

    // TODO: correct forwarding?
    template <typename T>
    __host__ static constexpr auto read_first_lane(T&& a)
    {
        return ck::forwarder{}(a);
    }

    template <typename T>
    __device__ static constexpr auto read_first_lane(T&& a)
    {
        return __builtin_amdgcn_readfirstlane(a);
    }
Chao Liu's avatar
Chao Liu committed
102
103
};

Chao Liu's avatar
Chao Liu committed
104
105
template <typename Server, typename Program, typename... Xs>
__global__ void gpu_program_wrapper(Server server, Program f, Xs... xs)
Chao Liu's avatar
Chao Liu committed
106
{
Chao Liu's avatar
Chao Liu committed
107
108
    server.gpu_init();
    f(server, xs...);
Chao Liu's avatar
Chao Liu committed
109
110
}

Chao Liu's avatar
Chao Liu committed
111
112
template <typename Server, typename Program, typename... Xs>
void launch(Server server, Program f, dim3 grid_dim, dim3 block_dim, Xs... xs)
Chao Liu's avatar
Chao Liu committed
113
{
Chao Liu's avatar
Chao Liu committed
114
    server.cpu_init();
Chao Liu's avatar
Chao Liu committed
115

Chao Liu's avatar
Chao Liu committed
116
    f(server, xs...);
Chao Liu's avatar
Chao Liu committed
117

Chao Liu's avatar
Chao Liu committed
118
    printf("meta data size %d\n", server.meta_data_.size_);
Chao Liu's avatar
Chao Liu committed
119

Chao Liu's avatar
Chao Liu committed
120
    gpu_program_wrapper<Server, Program><<<grid_dim, block_dim, 0, nullptr>>>(server, f, xs...);
Chao Liu's avatar
Chao Liu committed
121
}