utils.h 9.19 KB
Newer Older
PanZezhongQY's avatar
PanZezhongQY committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#ifndef __UTILS_H__
#define __UTILS_H__

#include "infiniop/tensor_descriptor.h"
#include <algorithm>
#include <iostream>
#include <numeric>
#include <stdio.h>
#include <stdlib.h>
#include <vector>

/* This file contains some useful macros and helper functions */

#define ROUND_UP_DIV(x, y) ((x + y - 1) / y)

16
17
18
19
20
21
22
23
#define CHECK_ERROR(call, target, errCode)                                     \
    do {                                                                       \
        if (auto value = (call); value == (target)) {                          \
            std::cerr << "Error: expected " << (target) << " but got "         \
                      << value << " in file " << __FILE__ << ", function "     \
                      << __func__ << ", line " << __LINE__ << std::endl;       \
            return (errCode);                                                  \
        }                                                                      \
PanZezhongQY's avatar
PanZezhongQY committed
24
25
    } while (0)

26
27
#define CREATE_CHECK_ERROR(expr, value, target, errCode)                       \
    expr;                                                                      \
PanZezhongQY's avatar
PanZezhongQY committed
28
29
    CHECK_ERROR(value, target, errCode)

30
31
32
33
34
35
36
37
#define CHECK_STATUS(call, target)                                             \
    do {                                                                       \
        if (auto value = (call); value != (target)) {                          \
            std::cerr << "Error: expected " << (target) << " but got "         \
                      << value << " in file " << __FILE__ << ", function "     \
                      << __func__ << ", line " << __LINE__ << std::endl;       \
            return value;                                                      \
        }                                                                      \
PanZezhongQY's avatar
PanZezhongQY committed
38
39
    } while (0)

PanZezhong's avatar
PanZezhong committed
40
inline std::vector<int64_t> getByteStrides(infiniopTensorDescriptor_t desc) {
PanZezhongQY's avatar
PanZezhongQY committed
41
42
    std::vector<int64_t> strides(desc->ndim);
    for (uint64_t i = 0; i < desc->ndim; i++) {
PanZezhong's avatar
PanZezhong committed
43
        strides[i] = desc->strides[i] * infiniSizeof(desc->dtype);
PanZezhongQY's avatar
PanZezhongQY committed
44
45
46
47
    }
    return strides;
}

PanZezhong's avatar
PanZezhong committed
48
49
50
51
52
53
54
55
inline size_t getByteSize(infiniopTensorDescriptor_t desc) {
    size_t size = 1;
    for (size_t i = 0; i < desc->ndim; i++) {
        size *= desc->shape[i];
    }
    return size * infiniSizeof(desc->dtype);
}

PanZezhongQY's avatar
PanZezhongQY committed
56
57
58
// calculate the broadcasted shape for two tensors
inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
                              const uint64_t *shape2, uint64_t ndim2,
59
60
61
                              uint64_t *broadcast_shape,
                              uint64_t *padded_shape1, uint64_t *padded_shape2,
                              uint64_t max_rank) {
PanZezhongQY's avatar
PanZezhongQY committed
62
63
64
65
66
67
68
69
    // prepending and initializing
    std::fill(padded_shape1, padded_shape1 + max_rank, 1);
    std::fill(padded_shape2, padded_shape2 + max_rank, 1);
    std::copy(shape1, shape1 + ndim1, padded_shape1 + max_rank - ndim1);
    std::copy(shape2, shape2 + ndim2, padded_shape2 + max_rank - ndim2);

    // compute broadcasted shape
    for (size_t i = 0; i < max_rank; ++i) {
70
71
        if (padded_shape1[i] == padded_shape2[i] || padded_shape1[i] == 1 ||
            padded_shape2[i] == 1) {
PanZezhongQY's avatar
PanZezhongQY committed
72
73
74
75
76
77
78
79
80
            broadcast_shape[i] = std::max(padded_shape1[i], padded_shape2[i]);
        } else {
            return false;
        }
    }

    return true;
}

81
82
83
84
85
// check if the shape of tensor c is valid after broadcasting tensors a and b
// and also get the broadcasted shapes
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
                                  infiniopTensorDescriptor_t b,
                                  infiniopTensorDescriptor_t c,
PanZezhongQY's avatar
PanZezhongQY committed
86
                                  uint64_t broadcast_ndim) {
87
88
    std::vector<uint64_t> broadcast_shape_(broadcast_ndim),
        padded_shape1_(broadcast_ndim), padded_shape2_(broadcast_ndim);
PanZezhongQY's avatar
PanZezhongQY committed
89
90
91
    auto broadcast_shape = broadcast_shape_.data(),
         padded_shape1 = padded_shape1_.data(),
         padded_shape2 = padded_shape2_.data();
92
93
94
95
    if (broadcast_ndim != c->ndim ||
        !getBroadcastShape(a->shape, a->ndim, b->shape, b->ndim,
                           broadcast_shape, padded_shape1, padded_shape2,
                           broadcast_ndim)) {
PanZezhongQY's avatar
PanZezhongQY committed
96
97
        return false;
    }
98
99
    return std::equal(broadcast_shape, broadcast_shape + broadcast_ndim,
                      c->shape);
PanZezhongQY's avatar
PanZezhongQY committed
100
101
}

102
103
104
105
// check if the shape of tensor src can be validly broadcasted to that of the
// tensor dst
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t dst,
                                  infiniopTensorDescriptor_t src) {
PanZezhongQY's avatar
PanZezhongQY committed
106
107
108
109
110
111
    if (dst->ndim < src->ndim) {
        return false;
    }
    std::vector<size_t> padded_shape_(dst->ndim);
    auto padded_shape = padded_shape_.data();
    std::fill(padded_shape, padded_shape + dst->ndim, 1);
112
113
    std::copy(src->shape, src->shape + src->ndim,
              padded_shape + dst->ndim - src->ndim);
PanZezhongQY's avatar
PanZezhongQY committed
114
115
116
117
118
119
120
121
122
    for (size_t i = 0; i < dst->ndim; ++i) {
        if (padded_shape[i] != dst->shape[i] && padded_shape[i] != 1) {
            return false;
        }
    }
    return true;
}

// check if the shape of tensor c is valid after broadcasting tensors a and b
123
124
125
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
                                  infiniopTensorDescriptor_t b,
                                  infiniopTensorDescriptor_t c) {
PanZezhongQY's avatar
PanZezhongQY committed
126
127
128
129
130
    return isValidBroadcastShape(a, b, c, std::max(a->ndim, b->ndim));
}


// permute the dimensions of a tensor descriptor
131
132
inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
                                          const std::vector<size_t> &order) {
PanZezhongQY's avatar
PanZezhongQY committed
133
134
135
136
137
138
139
140
141
142
143
144
145
    size_t ndim = desc->ndim;
    if (order.size() != ndim) {
        return nullptr;
    }
    size_t *shape = new size_t[ndim];
    int64_t *strides = new int64_t[ndim];
    for (size_t i = 0; i < ndim; i++) {
        if (std::find(order.begin(), order.end(), i) == order.end()) {
            return nullptr;
        }
        shape[i] = desc->shape[order[i]];
        strides[i] = desc->strides[order[i]];
    }
146
    return new InfiniopTensorDescriptor{desc->dtype, ndim, shape, strides};
PanZezhongQY's avatar
PanZezhongQY committed
147
148
}

149
150
// check if the dimensions [dim_start, dim_end] of a tensor descriptor are
// contiguous
PanZezhong's avatar
PanZezhong committed
151
inline bool isContiguous(const infiniopTensorDescriptor_t &desc,
152
                          size_t dim_start, size_t dim_end) {
PanZezhongQY's avatar
PanZezhongQY committed
153
    for (size_t i = dim_start + 1; i <= dim_end; i++) {
154
155
        if (desc->strides[i - 1] !=
            static_cast<int64_t>(desc->shape[i]) * desc->strides[i]) {
PanZezhongQY's avatar
PanZezhongQY committed
156
157
158
159
160
161
            return false;
        }
    }
    return true;
}

PanZezhong's avatar
PanZezhong committed
162
inline bool isContiguous(const infiniopTensorDescriptor_t &desc) {
PanZezhongQY's avatar
PanZezhongQY committed
163
164
165
    if (desc->ndim == 0) {
        return true;
    }
PanZezhong's avatar
PanZezhong committed
166
    return isContiguous(desc, 0, desc->ndim - 1);
PanZezhongQY's avatar
PanZezhongQY committed
167
168
169
}

// merge the dimensions [dim_start, dim_end] of a tensor descriptor
PanZezhong's avatar
PanZezhong committed
170
inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
171
                                            size_t dim_start, size_t dim_end) {
PanZezhongQY's avatar
PanZezhongQY committed
172
173
174
175
176
177
178
179
180
181
182
183
184
185
    size_t ndim = desc->ndim;
    if (dim_start > dim_end || dim_end >= ndim) {
        return nullptr;
    }

    size_t new_ndim = ndim - (dim_end - dim_start);
    size_t *new_shape = new size_t[new_ndim];
    int64_t *new_strides = new int64_t[new_ndim];
    size_t index = 0;
    for (size_t i = 0; i < dim_start; i++) {
        new_shape[index] = desc->shape[i];
        new_strides[index] = desc->strides[i];
        index++;
    }
PanZezhong's avatar
PanZezhong committed
186
    if (!isContiguous(desc, dim_start, dim_end)) {
PanZezhongQY's avatar
PanZezhongQY committed
187
188
189
190
191
192
193
194
195
196
197
198
199
        return nullptr;
    }
    new_shape[index] = 1;
    for (size_t i = dim_start; i <= dim_end; i++) {
        new_shape[index] *= desc->shape[i];
    }
    new_strides[index] = desc->strides[dim_end];
    index++;
    for (size_t i = dim_end + 1; i < ndim; i++) {
        new_shape[index] = desc->shape[i];
        new_strides[index] = desc->strides[i];
        index++;
    }
200
201
    return new InfiniopTensorDescriptor{desc->dtype, new_ndim, new_shape,
                                        new_strides};
PanZezhongQY's avatar
PanZezhongQY committed
202
203
204
}

// split the dimension dim of a tensor descriptor into multiple dimensions
PanZezhong's avatar
PanZezhong committed
205
inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
206
207
                                            size_t dim,
                                            const std::vector<size_t> &dims) {
PanZezhongQY's avatar
PanZezhongQY committed
208
    size_t ndim = desc->ndim;
209
210
    if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (size_t)1,
                                            std::multiplies{})) {
PanZezhongQY's avatar
PanZezhongQY committed
211
212
213
214
215
216
217
218
219
220
221
222
223
        return nullptr;
    }
    size_t new_ndim = ndim + dims.size() - 1;
    size_t *new_shape = new size_t[new_ndim];
    int64_t *new_strides = new int64_t[new_ndim];
    size_t index = 0;
    for (size_t i = 0; i < dim; i++) {
        new_shape[index] = desc->shape[i];
        new_strides[index] = desc->strides[i];
        index++;
    }
    for (size_t i = 0; i < dims.size(); i++) {
        new_shape[index] = dims[i];
224
225
226
227
        new_strides[index] =
            desc->strides[dim] * desc->shape[dim] /
            std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1,
                            std::multiplies<size_t>());
PanZezhongQY's avatar
PanZezhongQY committed
228
229
230
231
232
233
234
        index++;
    }
    for (size_t i = dim + 1; i < ndim; i++) {
        new_shape[index] = desc->shape[i];
        new_strides[index] = desc->strides[i];
        index++;
    }
235
236
    return new InfiniopTensorDescriptor{desc->dtype, new_ndim, new_shape,
                                        new_strides};
PanZezhongQY's avatar
PanZezhongQY committed
237
238
}

239
#endif // __UTILS_H__