utils.h 8.9 KB
Newer Older
PanZezhongQY's avatar
PanZezhongQY committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#ifndef __UTILS_H__
#define __UTILS_H__

#include "infiniop/tensor_descriptor.h"
#include <algorithm>
#include <iostream>
#include <numeric>
#include <stdio.h>
#include <stdlib.h>
#include <vector>

/* This file contains some useful macros and helper functions */

#define ROUND_UP_DIV(x, y) ((x + y - 1) / y)

16
17
18
19
20
21
22
23
#define CHECK_ERROR(call, target, errCode)                                 \
    do {                                                                   \
        if (auto value = (call); value == (target)) {                      \
            std::cerr << "Error: expected " << (target) << " but got "     \
                      << value << " in file " << __FILE__ << ", function " \
                      << __func__ << ", line " << __LINE__ << std::endl;   \
            return (errCode);                                              \
        }                                                                  \
PanZezhongQY's avatar
PanZezhongQY committed
24
25
    } while (0)

26
27
#define CREATE_CHECK_ERROR(expr, value, target, errCode) \
    expr;                                                \
PanZezhongQY's avatar
PanZezhongQY committed
28
29
    CHECK_ERROR(value, target, errCode)

30
31
32
33
34
35
36
37
#define CHECK_STATUS(call, target)                                         \
    do {                                                                   \
        if (auto value = (call); value != (target)) {                      \
            std::cerr << "Error: expected " << (target) << " but got "     \
                      << value << " in file " << __FILE__ << ", function " \
                      << __func__ << ", line " << __LINE__ << std::endl;   \
            return value;                                                  \
        }                                                                  \
PanZezhongQY's avatar
PanZezhongQY committed
38
39
    } while (0)

PanZezhong's avatar
PanZezhong committed
40
inline std::vector<int64_t> getByteStrides(infiniopTensorDescriptor_t desc) {
PanZezhongQY's avatar
PanZezhongQY committed
41
42
    std::vector<int64_t> strides(desc->ndim);
    for (uint64_t i = 0; i < desc->ndim; i++) {
PanZezhong's avatar
PanZezhong committed
43
        strides[i] = desc->strides[i] * infiniSizeof(desc->dtype);
PanZezhongQY's avatar
PanZezhongQY committed
44
45
46
47
    }
    return strides;
}

PanZezhong's avatar
PanZezhong committed
48
49
50
51
52
53
54
55
inline size_t getByteSize(infiniopTensorDescriptor_t desc) {
    size_t size = 1;
    for (size_t i = 0; i < desc->ndim; i++) {
        size *= desc->shape[i];
    }
    return size * infiniSizeof(desc->dtype);
}

PanZezhongQY's avatar
PanZezhongQY committed
56
57
58
// calculate the broadcasted shape for two tensors
inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
                              const uint64_t *shape2, uint64_t ndim2,
59
60
61
                              uint64_t *broadcast_shape,
                              uint64_t *padded_shape1, uint64_t *padded_shape2,
                              uint64_t max_rank) {
PanZezhongQY's avatar
PanZezhongQY committed
62
63
64
65
66
67
68
69
    // prepending and initializing
    std::fill(padded_shape1, padded_shape1 + max_rank, 1);
    std::fill(padded_shape2, padded_shape2 + max_rank, 1);
    std::copy(shape1, shape1 + ndim1, padded_shape1 + max_rank - ndim1);
    std::copy(shape2, shape2 + ndim2, padded_shape2 + max_rank - ndim2);

    // compute broadcasted shape
    for (size_t i = 0; i < max_rank; ++i) {
70
        if (padded_shape1[i] == padded_shape2[i] || padded_shape1[i] == 1 || padded_shape2[i] == 1) {
PanZezhongQY's avatar
PanZezhongQY committed
71
72
73
74
75
76
77
78
79
            broadcast_shape[i] = std::max(padded_shape1[i], padded_shape2[i]);
        } else {
            return false;
        }
    }

    return true;
}

80
81
82
83
84
// check if the shape of tensor c is valid after broadcasting tensors a and b
// and also get the broadcasted shapes
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
                                  infiniopTensorDescriptor_t b,
                                  infiniopTensorDescriptor_t c,
PanZezhongQY's avatar
PanZezhongQY committed
85
                                  uint64_t broadcast_ndim) {
86
87
    std::vector<uint64_t> broadcast_shape_(broadcast_ndim),
        padded_shape1_(broadcast_ndim), padded_shape2_(broadcast_ndim);
PanZezhongQY's avatar
PanZezhongQY committed
88
89
90
    auto broadcast_shape = broadcast_shape_.data(),
         padded_shape1 = padded_shape1_.data(),
         padded_shape2 = padded_shape2_.data();
91
    if (broadcast_ndim != c->ndim || !getBroadcastShape(a->shape, a->ndim, b->shape, b->ndim, broadcast_shape, padded_shape1, padded_shape2, broadcast_ndim)) {
PanZezhongQY's avatar
PanZezhongQY committed
92
93
        return false;
    }
94
95
    return std::equal(broadcast_shape, broadcast_shape + broadcast_ndim,
                      c->shape);
PanZezhongQY's avatar
PanZezhongQY committed
96
97
}

98
99
100
101
// check if the shape of tensor src can be validly broadcasted to that of the
// tensor dst
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t dst,
                                  infiniopTensorDescriptor_t src) {
PanZezhongQY's avatar
PanZezhongQY committed
102
103
104
105
106
107
    if (dst->ndim < src->ndim) {
        return false;
    }
    std::vector<size_t> padded_shape_(dst->ndim);
    auto padded_shape = padded_shape_.data();
    std::fill(padded_shape, padded_shape + dst->ndim, 1);
108
109
    std::copy(src->shape, src->shape + src->ndim,
              padded_shape + dst->ndim - src->ndim);
PanZezhongQY's avatar
PanZezhongQY committed
110
111
112
113
114
115
116
117
118
    for (size_t i = 0; i < dst->ndim; ++i) {
        if (padded_shape[i] != dst->shape[i] && padded_shape[i] != 1) {
            return false;
        }
    }
    return true;
}

// check if the shape of tensor c is valid after broadcasting tensors a and b
119
120
121
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
                                  infiniopTensorDescriptor_t b,
                                  infiniopTensorDescriptor_t c) {
PanZezhongQY's avatar
PanZezhongQY committed
122
123
124
125
    return isValidBroadcastShape(a, b, c, std::max(a->ndim, b->ndim));
}

// permute the dimensions of a tensor descriptor
126
127
inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
                                          const std::vector<size_t> &order) {
PanZezhongQY's avatar
PanZezhongQY committed
128
129
130
131
132
133
134
135
136
137
138
139
140
    size_t ndim = desc->ndim;
    if (order.size() != ndim) {
        return nullptr;
    }
    size_t *shape = new size_t[ndim];
    int64_t *strides = new int64_t[ndim];
    for (size_t i = 0; i < ndim; i++) {
        if (std::find(order.begin(), order.end(), i) == order.end()) {
            return nullptr;
        }
        shape[i] = desc->shape[order[i]];
        strides[i] = desc->strides[order[i]];
    }
141
    return new InfiniopTensorDescriptor{desc->dtype, ndim, shape, strides};
PanZezhongQY's avatar
PanZezhongQY committed
142
143
}

144
145
// check if the dimensions [dim_start, dim_end] of a tensor descriptor are
// contiguous
PanZezhong's avatar
PanZezhong committed
146
inline bool isContiguous(const infiniopTensorDescriptor_t &desc,
147
                         size_t dim_start, size_t dim_end) {
PanZezhongQY's avatar
PanZezhongQY committed
148
    for (size_t i = dim_start + 1; i <= dim_end; i++) {
149
        if (desc->strides[i - 1] != static_cast<int64_t>(desc->shape[i]) * desc->strides[i]) {
PanZezhongQY's avatar
PanZezhongQY committed
150
151
152
153
154
155
            return false;
        }
    }
    return true;
}

PanZezhong's avatar
PanZezhong committed
156
inline bool isContiguous(const infiniopTensorDescriptor_t &desc) {
PanZezhongQY's avatar
PanZezhongQY committed
157
158
159
    if (desc->ndim == 0) {
        return true;
    }
PanZezhong's avatar
PanZezhong committed
160
    return isContiguous(desc, 0, desc->ndim - 1);
PanZezhongQY's avatar
PanZezhongQY committed
161
162
163
}

// merge the dimensions [dim_start, dim_end] of a tensor descriptor
PanZezhong's avatar
PanZezhong committed
164
inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
165
                                           size_t dim_start, size_t dim_end) {
PanZezhongQY's avatar
PanZezhongQY committed
166
167
168
169
170
171
172
173
174
175
176
177
178
179
    size_t ndim = desc->ndim;
    if (dim_start > dim_end || dim_end >= ndim) {
        return nullptr;
    }

    size_t new_ndim = ndim - (dim_end - dim_start);
    size_t *new_shape = new size_t[new_ndim];
    int64_t *new_strides = new int64_t[new_ndim];
    size_t index = 0;
    for (size_t i = 0; i < dim_start; i++) {
        new_shape[index] = desc->shape[i];
        new_strides[index] = desc->strides[i];
        index++;
    }
PanZezhong's avatar
PanZezhong committed
180
    if (!isContiguous(desc, dim_start, dim_end)) {
PanZezhongQY's avatar
PanZezhongQY committed
181
182
183
184
185
186
187
188
189
190
191
192
193
        return nullptr;
    }
    new_shape[index] = 1;
    for (size_t i = dim_start; i <= dim_end; i++) {
        new_shape[index] *= desc->shape[i];
    }
    new_strides[index] = desc->strides[dim_end];
    index++;
    for (size_t i = dim_end + 1; i < ndim; i++) {
        new_shape[index] = desc->shape[i];
        new_strides[index] = desc->strides[i];
        index++;
    }
194
195
    return new InfiniopTensorDescriptor{desc->dtype, new_ndim, new_shape,
                                        new_strides};
PanZezhongQY's avatar
PanZezhongQY committed
196
197
198
}

// split the dimension dim of a tensor descriptor into multiple dimensions
PanZezhong's avatar
PanZezhong committed
199
inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
200
201
                                           size_t dim,
                                           const std::vector<size_t> &dims) {
PanZezhongQY's avatar
PanZezhongQY committed
202
    size_t ndim = desc->ndim;
203
    if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (size_t)1, std::multiplies{})) {
PanZezhongQY's avatar
PanZezhongQY committed
204
205
206
207
208
209
210
211
212
213
214
215
216
        return nullptr;
    }
    size_t new_ndim = ndim + dims.size() - 1;
    size_t *new_shape = new size_t[new_ndim];
    int64_t *new_strides = new int64_t[new_ndim];
    size_t index = 0;
    for (size_t i = 0; i < dim; i++) {
        new_shape[index] = desc->shape[i];
        new_strides[index] = desc->strides[i];
        index++;
    }
    for (size_t i = 0; i < dims.size(); i++) {
        new_shape[index] = dims[i];
217
        new_strides[index] = desc->strides[dim] * desc->shape[dim] / std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1, std::multiplies<size_t>());
PanZezhongQY's avatar
PanZezhongQY committed
218
219
220
221
222
223
224
        index++;
    }
    for (size_t i = dim + 1; i < ndim; i++) {
        new_shape[index] = desc->shape[i];
        new_strides[index] = desc->strides[i];
        index++;
    }
225
226
    return new InfiniopTensorDescriptor{desc->dtype, new_ndim, new_shape,
                                        new_strides};
PanZezhongQY's avatar
PanZezhongQY committed
227
228
}

229
#endif // __UTILS_H__