tensor.cpp 15.8 KB
Newer Older
PanZezhong's avatar
init  
PanZezhong committed
1
2
#include "../tensor.hpp"
#include "../utils.hpp"
3
#include <algorithm>
PanZezhong's avatar
init  
PanZezhong committed
4
5
#include <fstream>
#include <iostream>
6
#include <mutex>
PanZezhong's avatar
init  
PanZezhong committed
7
#include <numeric>
PanZezhong's avatar
PanZezhong committed
8
#include <sstream>
PanZezhong's avatar
init  
PanZezhong committed
9
10
11
12

std::shared_ptr<TensorDesc>
TensorDesc::create(infiniDtype_t dtype, const std::vector<size_t> &shape,
                   const std::vector<ptrdiff_t> &strides) {
PanZezhong's avatar
PanZezhong committed
13
    return std::shared_ptr<TensorDesc>(new TensorDesc(dtype, shape, strides));
PanZezhong's avatar
init  
PanZezhong committed
14
15
}

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
std::shared_ptr<TensorDesc>
TensorDesc::create(infiniDtype_t dtype, const std::vector<size_t> &shape) {
    auto ndim = shape.size();
    auto strides = std::vector<ptrdiff_t>(ndim);
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
    }
    return create(dtype, shape, strides);
}

std::shared_ptr<TensorDesc>
TensorDesc::createWithOrder(infiniDtype_t dtype, const std::vector<size_t> &shape,
                            const std::vector<size_t> &order) {
    ASSERT_EQ(shape.size(), order.size());
    auto ndim = shape.size();
    if (ndim == 0) {
        return create(dtype, shape);
    }
    auto strides = std::vector<ptrdiff_t>(order.size());
    auto idx = std::find(order.begin(), order.end(), size_t(ndim - 1));
    strides[std::distance(order.begin(), idx)] = 1;
    for (int i = ndim - 2; i >= 0; i--) {
        auto prev_dim = shape[std::distance(order.begin(), idx)];
        auto prev_stride = strides[std::distance(order.begin(), idx)];
        idx = std::find(order.begin(), order.end(), size_t(i));
        strides[std::distance(order.begin(), idx)] = prev_stride * prev_dim;
    }
    return create(dtype, shape, strides);
}

PanZezhong's avatar
PanZezhong committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
infiniopTensorDescriptor_t TensorDesc::desc() const {
    if (_desc == nullptr) {
        RUN_INFINI(infiniopCreateTensorDescriptor(
            (infiniopTensorDescriptor_t *)(&_desc), _shape.size(), _shape.data(),
            _strides.data(), _dtype));
    }
    return _desc;
};

void TensorDesc::resetDesc() {
    if (this->_desc != nullptr) {
        infiniopDestroyTensorDescriptor(this->_desc);
        this->_desc = nullptr;
    }
}

bool TensorDesc::isContigous() const {
    auto ndim = this->ndim();
    auto shape = this->shape();
    auto strides = std::vector<ptrdiff_t>(ndim);
    strides[ndim - 1] = 1;
    for (int i = ndim - 2; i >= 0; i--) {
        strides[i] = strides[i + 1] * shape[i + 1];
    }
    ASSERT_EQ(strides.size(), this->_strides.size());
    return std::equal(strides.begin(), strides.end(), this->_strides.begin());
}

std::string TensorDesc::info() const {
    std::stringstream ss;

    ss << "Tensor: "
       << "shape[ ";
    for (auto s : this->shape()) {
        ss << s << " ";
    }
    ss << "] strides[ ";
    for (auto s : this->strides()) {
        ss << s << " ";
    }
    ss << "] dtype=" << this->dtype();

    return ss.str();
}

PanZezhong's avatar
init  
PanZezhong committed
94
TensorDesc::~TensorDesc() {
PanZezhong's avatar
PanZezhong committed
95
    this->resetDesc();
PanZezhong's avatar
init  
PanZezhong committed
96
97
}

PanZezhong's avatar
PanZezhong committed
98
99
100
101
102
103
const std::vector<size_t> &Tensor::shape() const { return this->_desc->shape(); }
const std::vector<ptrdiff_t> &Tensor::strides() const { return this->_desc->strides(); }
size_t Tensor::ndim() const { return this->_desc->ndim(); }
infiniDtype_t Tensor::dtype() const { return this->_desc->dtype(); }
infiniDevice_t Tensor::deviceType() const { return this->_storage->deviceType(); }
int Tensor::deviceId() const { return this->_storage->deviceId(); }
PanZezhong's avatar
init  
PanZezhong committed
104
105
Tensor::~Tensor() {}

PanZezhong's avatar
PanZezhong committed
106
ptrdiff_t Tensor::dataOffset() const {
PanZezhong's avatar
PanZezhong committed
107
    return _offset;
PanZezhong's avatar
init  
PanZezhong committed
108
109
}

PanZezhong's avatar
PanZezhong committed
110
infiniopTensorDescriptor_t Tensor::desc() const { return _desc->desc(); }
PanZezhong's avatar
init  
PanZezhong committed
111
112
113

std::shared_ptr<Tensor> Tensor::buffer(infiniDtype_t dtype,
                                       const std::vector<size_t> &shape,
thatPepe's avatar
thatPepe committed
114
                                       std::shared_ptr<MemoryPool> pool) {
PanZezhong's avatar
init  
PanZezhong committed
115
116
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    auto ndim = shape.size();
PanZezhong's avatar
PanZezhong committed
117

PanZezhong's avatar
init  
PanZezhong committed
118
119
    size_t size = std::accumulate(shape.begin(), shape.end(), dsize(dtype), std::multiplies<size_t>());
    auto strides = std::vector<ptrdiff_t>(ndim);
PanZezhong's avatar
PanZezhong committed
120
121
122
123
124
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
PanZezhong's avatar
init  
PanZezhong committed
125
    }
thatPepe's avatar
thatPepe committed
126
    tensor->_storage = Storage::createFromPool(size, pool);
PanZezhong's avatar
PanZezhong committed
127
    tensor->_desc = TensorDesc::create(dtype, shape, strides);
PanZezhong's avatar
init  
PanZezhong committed
128
129
130
131
132
133
134
135
136
137
    tensor->_offset = 0;
    return tensor;
}

std::shared_ptr<Tensor> Tensor::weight(void *data, infiniDtype_t dtype,
                                       const std::vector<size_t> &shape) {
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    auto ndim = shape.size();
    size_t size = std::accumulate(shape.begin(), shape.end(), dsize(dtype), std::multiplies<size_t>());
    auto strides = std::vector<ptrdiff_t>(ndim);
PanZezhong's avatar
PanZezhong committed
138
139
140
141
142
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
PanZezhong's avatar
init  
PanZezhong committed
143
    }
PanZezhong's avatar
PanZezhong committed
144

PanZezhong's avatar
PanZezhong committed
145
    tensor->_storage = Storage::create(size);
PanZezhong's avatar
PanZezhong committed
146
    tensor->_desc = TensorDesc::create(dtype, shape, strides);
147
148
149
150
151
152
153
    // NOTE: 为兼容部分平台(沐曦)多线程并发对同一host数据执行memcpy卡死问题
    static std::mutex mutex;
    {
        std::lock_guard<std::mutex> lock(mutex);
        RUN_INFINI(infinirtMemcpy(tensor->_storage->memory(),
                                  data, size, INFINIRT_MEMCPY_H2D));
    }
PanZezhong's avatar
PanZezhong committed
154

PanZezhong's avatar
init  
PanZezhong committed
155
156
157
158
    tensor->_offset = 0;
    return tensor;
}

PanZezhong's avatar
PanZezhong committed
159
160
std::shared_ptr<Tensor> Tensor::memShare(const std::vector<size_t> &shape, infiniDtype_t dtype_) const {
    auto dtype = dtype_ == INFINI_DTYPE_INVALID ? this->dtype() : dtype_;
161
    size_t size = std::accumulate(shape.begin(), shape.end(), dsize(dtype), std::multiplies<size_t>());
PanZezhong's avatar
PanZezhong committed
162
    ASSERT(size <= this->_storage->size());
163
164
165
166
167
168
169
170
171
172
173
174

    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    auto ndim = shape.size();
    auto strides = std::vector<ptrdiff_t>(ndim);
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
    }
    tensor->_storage = this->_storage;
    tensor->_offset = 0;
PanZezhong's avatar
PanZezhong committed
175
    tensor->_desc = TensorDesc::create(dtype, shape, strides);
176
177
178
    return tensor;
}

PanZezhong's avatar
PanZezhong committed
179
void *Tensor::dataImpl(ptrdiff_t offset) const {
PanZezhong's avatar
PanZezhong committed
180
    return (char *)(this->_storage->memory()) + this->_offset + offset * dsize(this->dtype());
PanZezhong's avatar
init  
PanZezhong committed
181
182
183
}

void *Tensor::data(ptrdiff_t offset) {
PanZezhong's avatar
PanZezhong committed
184
    return this->dataImpl(offset);
PanZezhong's avatar
init  
PanZezhong committed
185
186
187
}

const void *Tensor::data(ptrdiff_t offset) const {
PanZezhong's avatar
PanZezhong committed
188
    return this->dataImpl(offset);
PanZezhong's avatar
init  
PanZezhong committed
189
190
}

PanZezhong's avatar
PanZezhong committed
191
192
void Tensor::copyFrom(std::shared_ptr<Tensor const> src,
                      infiniopHandle_t handle, infinirtStream_t stream) {
PanZezhong's avatar
init  
PanZezhong committed
193
194
195
196
    ASSERT_EQ(this->shape(), src->shape());
    ASSERT_EQ(this->dtype(), src->dtype());
    infiniopRearrangeDescriptor_t desc;
    RUN_INFINI(infiniopCreateRearrangeDescriptor(
PanZezhong's avatar
PanZezhong committed
197
        handle, &desc, this->desc(), src->desc()));
PanZezhong's avatar
init  
PanZezhong committed
198
199
200
201
202
    RUN_INFINI(infiniopRearrange(desc, this->data(), src->data(),
                                 stream));
    RUN_INFINI(infiniopDestroyRearrangeDescriptor(desc));
}

PanZezhong's avatar
PanZezhong committed
203
204
bool Tensor::isContigous() const {
    return this->_desc->isContigous();
PanZezhong's avatar
init  
PanZezhong committed
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
}

template <typename T>
void print_data(T *data, const std::vector<size_t> &shape,
                const std::vector<ptrdiff_t> &strides, size_t dim) {
    if (dim == shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            std::cout << data[i] << " ";
        }
        std::cout << std::endl;
    } else if (dim < shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            print_data(data + i * strides[dim], shape, strides, dim + 1);
        }
    }
}

template <>
void print_data(uint16_t const *data, const std::vector<size_t> &shape,
                const std::vector<ptrdiff_t> &strides, size_t dim) {
    if (dim == shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            std::cout << f16_to_f32(data[i * strides[dim]]) << " ";
        }
PanZezhong's avatar
PanZezhong committed
229
        std::cout << std::endl;
PanZezhong's avatar
init  
PanZezhong committed
230
231
232
233
234
235
236
    } else if (dim < shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            print_data(data + i * strides[dim], shape, strides, dim + 1);
        }
    }
}

PanZezhong's avatar
PanZezhong committed
237
238
239
240
241
242
243
244
245
246
247
248
249
250
void print_data_bf16(uint16_t const *data, const std::vector<size_t> &shape,
                     const std::vector<ptrdiff_t> &strides, size_t dim) {
    if (dim == shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            std::cout << bf16_to_f32(data[i * strides[dim]]) << " ";
        }
        std::cout << std::endl;
    } else if (dim < shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            print_data(data + i * strides[dim], shape, strides, dim + 1);
        }
    }
}

PanZezhong's avatar
PanZezhong committed
251
252
253
254
std::string Tensor::info() const {
    std::stringstream ss;

    ss << "Tensor: "
PanZezhong's avatar
PanZezhong committed
255
       << this->_desc->info()
PanZezhong's avatar
PanZezhong committed
256
257
       << " device=" << this->deviceType()
       << " device_id=" << this->deviceId();
PanZezhong's avatar
PanZezhong committed
258
    return this->_desc->info();
PanZezhong's avatar
PanZezhong committed
259
260
}

261
std::shared_ptr<Tensor> Tensor::view(const std::vector<size_t> &new_shape) const {
262
263
264
265
    // Calculate total number of elements
    size_t numel = 1;
    for (auto s : shape()) {
        numel *= s;
266
    }
267

268
269
270
    size_t new_numel = 1;
    for (auto s : new_shape) {
        new_numel *= s;
271
272
    }

273
    ASSERT(numel == new_numel);
274

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
    // Handle empty tensors
    if (numel == 0) {
        return this->view_as(new_shape, {});
    }

    // Special case: view(-1) flattens the tensor
    if (new_shape.size() == 1 && new_shape[0] == static_cast<size_t>(-1)) {
        std::vector<size_t> flat_shape = {numel};
        return this->view_as(flat_shape, {});
    }

    // Check for -1 in new_shape (infer dimension)
    std::vector<size_t> inferred_shape = new_shape;
    size_t infer_index = static_cast<size_t>(-1);
    size_t known_elements = 1;

    for (size_t i = 0; i < new_shape.size(); ++i) {
        if (new_shape[i] == static_cast<size_t>(-1)) {
            ASSERT(infer_index == static_cast<size_t>(-1)); // Only one -1 allowed
            infer_index = i;
        } else {
            known_elements *= new_shape[i];
297
        }
298
    }
299

300
301
302
303
304
305
306
307
308
309
310
311
312
    if (infer_index != static_cast<size_t>(-1)) {
        ASSERT(numel % known_elements == 0);
        inferred_shape[infer_index] = numel / known_elements;
    }

    // For contiguous tensors, compute standard row-major strides
    if (this->isContigous()) {
        std::vector<ptrdiff_t> new_strides(inferred_shape.size());
        if (!inferred_shape.empty()) {
            new_strides.back() = 1;
            for (int i = static_cast<int>(inferred_shape.size()) - 2; i >= 0; --i) {
                new_strides[i] = new_strides[i + 1] * static_cast<ptrdiff_t>(inferred_shape[i + 1]);
            }
313
        }
314
315
316
317
318
319
320
321
322
323
        return this->view_as(inferred_shape, new_strides);
    }

    // For non-contiguous tensors
    std::vector<size_t> old_shape = shape();
    std::vector<ptrdiff_t> old_strides = strides();
    std::vector<ptrdiff_t> new_strides(inferred_shape.size(), 0);

    size_t old_idx = old_shape.size() - 1;
    size_t new_idx = inferred_shape.size() - 1;
324

325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
    if (new_idx != static_cast<size_t>(-1)) {
        new_strides[new_idx] = 1;
    }

    while (old_idx != static_cast<size_t>(-1) && new_idx != static_cast<size_t>(-1)) {
        size_t old_size = old_shape[old_idx];
        size_t new_size = inferred_shape[new_idx];

        if (old_size == 1) {
            old_idx--;
        } else if (new_size == 1) {
            new_strides[new_idx] = (new_idx == inferred_shape.size() - 1) ? 1 : new_strides[new_idx + 1];
            new_idx--;
        } else if (old_size == new_size) {
            new_strides[new_idx] = old_strides[old_idx];
            old_idx--;
            new_idx--;
        } else if (old_size < new_size) {
            size_t combined_size = old_size;
            ptrdiff_t combined_stride = old_strides[old_idx];
            old_idx--;

            while (old_idx != static_cast<size_t>(-1) && combined_size < new_size) {
                ASSERT(static_cast<size_t>(old_strides[old_idx]) == old_shape[old_idx + 1] * static_cast<size_t>(old_strides[old_idx + 1]));
                combined_size *= old_shape[old_idx];
                combined_stride = old_strides[old_idx];
                old_idx--;
            }

            ASSERT(combined_size == new_size);
            new_strides[new_idx] = combined_stride;
            new_idx--;
        } else {
            size_t remaining_size = old_size / new_size;
            ASSERT(old_size % new_size == 0);
            new_strides[new_idx] = old_strides[old_idx] * static_cast<ptrdiff_t>(remaining_size);
            new_idx--;

            if (remaining_size != 1) {
                if (new_idx != static_cast<size_t>(-1)) {
                    inferred_shape[new_idx] = remaining_size;
                    new_strides[new_idx] = old_strides[old_idx];
                    new_idx--;
                } else {
                    ASSERT(false);
                }
            }
            old_idx--;
373
        }
374
    }
375

376
377
378
379
380
    // Fill remaining dimensions (must be size 1)
    while (new_idx != static_cast<size_t>(-1)) {
        ASSERT(inferred_shape[new_idx] == 1);
        new_strides[new_idx] = new_strides[new_idx + 1];
        new_idx--;
381
382
    }

383
    return this->view_as(inferred_shape, new_strides);
384
385
}

386
387
388
389
390
391
392
393
std::shared_ptr<Tensor> Tensor::view_as(const std::vector<size_t> &new_shape, const std::vector<ptrdiff_t> &new_strides) const {
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    tensor->_storage = this->_storage;
    tensor->_desc = TensorDesc::create(this->dtype(), new_shape, new_strides);
    tensor->_offset = this->_offset;
    return tensor;
}

394
395
396
397
398
399
400
401
std::shared_ptr<Tensor> Tensor::view_as(const std::vector<size_t> &new_shape) const {
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    tensor->_storage = this->_storage;
    tensor->_desc = TensorDesc::create(this->dtype(), new_shape);
    tensor->_offset = this->_offset;
    return tensor;
}

PanZezhong's avatar
PanZezhong committed
402
void Tensor::debug(const std::string &filename) const {
PanZezhong's avatar
PanZezhong committed
403
404
    RUN_INFINI(infinirtDeviceSynchronize());

PanZezhong's avatar
PanZezhong committed
405
    std::cout << info() << std::endl;
PanZezhong's avatar
PanZezhong committed
406

PanZezhong's avatar
init  
PanZezhong committed
407
    void const *cpu_data;
PanZezhong's avatar
PanZezhong committed
408
    if (this->deviceType() != INFINI_DEVICE_CPU) {
PanZezhong's avatar
PanZezhong committed
409
410
411
        void *cpu_memory = std::malloc(this->_storage->size());
        RUN_INFINI(infinirtMemcpy(cpu_memory, this->_storage->memory(),
                                  this->_storage->size(), INFINIRT_MEMCPY_D2H));
PanZezhong's avatar
init  
PanZezhong committed
412
413
        cpu_data = cpu_memory;
    } else {
PanZezhong's avatar
PanZezhong committed
414
        cpu_data = this->_storage->memory();
PanZezhong's avatar
init  
PanZezhong committed
415
416
417
418
419
420
421
422
    }

    if (!filename.empty()) {
        std::ofstream outFile(filename, std::ios::binary);
        if (!outFile) {
            std::cerr << "Error opening file for writing: " << filename << "\n";
            return;
        }
PanZezhong's avatar
PanZezhong committed
423
        outFile.write(reinterpret_cast<const char *>(cpu_data), this->_storage->size());
PanZezhong's avatar
init  
PanZezhong committed
424
425
426
427
428
        outFile.close();
        std::cout << "Data written to file: " << filename << "\n";
        return;
    }

PanZezhong's avatar
PanZezhong committed
429
    switch (this->dtype()) {
PanZezhong's avatar
init  
PanZezhong committed
430
    case INFINI_DTYPE_F16:
PanZezhong's avatar
PanZezhong committed
431
        print_data((uint16_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
432
433
434
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_F32:
PanZezhong's avatar
PanZezhong committed
435
        print_data((float const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
436
437
438
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_U64:
PanZezhong's avatar
PanZezhong committed
439
        print_data((uint64_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
440
441
442
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_I64:
PanZezhong's avatar
PanZezhong committed
443
        print_data((int64_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
444
445
446
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_U32:
PanZezhong's avatar
PanZezhong committed
447
        print_data((uint32_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
448
449
450
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_I32:
PanZezhong's avatar
PanZezhong committed
451
        print_data((int32_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
452
453
                   this->shape(), this->strides(), 0);
        break;
PanZezhong's avatar
PanZezhong committed
454
455
456
457
    case INFINI_DTYPE_BF16:
        print_data_bf16((uint16_t const *)((char const *)cpu_data + dataOffset()),
                        this->shape(), this->strides(), 0);
        break;
PanZezhong's avatar
init  
PanZezhong committed
458
459
460
461
462
463
    default:
        PANIC("Unsupported data type");
    }
}

void Tensor::debug() const { this->debug(""); }